{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "b7fc99c3",
   "metadata": {},
   "source": [
    "# Use Implict Quantile Network to Play Pong-v4\n",
    "\n",
    "PyTorch version"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "2b23c9f5",
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "\n",
    "import copy\n",
    "import logging\n",
    "import itertools\n",
    "import sys\n",
    "\n",
    "import numpy as np\n",
    "np.random.seed(0)\n",
    "import pandas as pd\n",
    "import gym\n",
    "from gym.wrappers.atari_preprocessing import AtariPreprocessing\n",
    "from gym.wrappers.frame_stack import FrameStack\n",
    "import matplotlib.pyplot as plt\n",
    "import torch\n",
    "torch.manual_seed(0)\n",
    "from torch import nn\n",
    "from torch import optim\n",
    "\n",
    "logging.basicConfig(level=logging.DEBUG,\n",
    "        format='%(asctime)s [%(levelname)s] %(message)s',\n",
    "        stream=sys.stdout, datefmt='%H:%M:%S')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "693bdf1e",
   "metadata": {},
   "source": [
    "Environment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "6ab1cb62",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "00:01:01 [INFO] env: <AtariPreprocessing<TimeLimit<AtariEnv<PongNoFrameskip-v4>>>>\n",
      "00:01:01 [INFO] action_space: Discrete(6)\n",
      "00:01:01 [INFO] observation_space: : Box([[[0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  ...\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]]\n",
      "\n",
      " [[0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  ...\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]]\n",
      "\n",
      " [[0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  ...\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]]\n",
      "\n",
      " [[0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  ...\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]]], [[[255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  ...\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]]\n",
      "\n",
      " [[255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  ...\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]]\n",
      "\n",
      " [[255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  ...\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]]\n",
      "\n",
      " [[255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  ...\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]]], (4, 84, 84), uint8)\n",
      "00:01:01 [INFO] reward_range: (-inf, inf)\n",
      "00:01:01 [INFO] metadata: {'render.modes': ['human', 'rgb_array']}\n",
      "00:01:01 [INFO] num_stack: 4\n",
      "00:01:01 [INFO] lz4_compress: False\n",
      "00:01:01 [INFO] frames: deque([], maxlen=4)\n",
      "00:01:01 [INFO] id: PongNoFrameskip-v4\n",
      "00:01:01 [INFO] entry_point: gym.envs.atari:AtariEnv\n",
      "00:01:01 [INFO] reward_threshold: None\n",
      "00:01:01 [INFO] nondeterministic: False\n",
      "00:01:01 [INFO] max_episode_steps: 400000\n",
      "00:01:01 [INFO] _kwargs: {'game': 'pong', 'obs_type': 'image', 'frameskip': 1}\n",
      "00:01:01 [INFO] _env_name: PongNoFrameskip\n"
     ]
    }
   ],
   "source": [
    "env = FrameStack(AtariPreprocessing(gym.make('PongNoFrameskip-v4')),\n",
    "        num_stack=4)\n",
    "env.env.env.unwrapped.np_random.seed(0) # set seed for noops\n",
    "env.env.env.unwrapped.unwrapped.seed(0) # set seed for AtariEnv\n",
    "for key in vars(env):\n",
    "    logging.info('%s: %s', key, vars(env)[key])\n",
    "for key in vars(env.spec):\n",
    "    logging.info('%s: %s', key, vars(env.spec)[key])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3747e919",
   "metadata": {},
   "source": [
    "Agent"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "5eb74be3",
   "metadata": {},
   "outputs": [],
   "source": [
    "class DQNReplayer:\n",
    "    def __init__(self, capacity):\n",
    "        self.memory = pd.DataFrame(index=range(capacity),\n",
    "                columns=['state', 'action', 'reward', 'next_state', 'done'])\n",
    "        self.i = 0\n",
    "        self.count = 0\n",
    "        self.capacity = capacity\n",
    "\n",
    "    def store(self, *args):\n",
    "        self.memory.loc[self.i] = args\n",
    "        self.i = (self.i + 1) % self.capacity\n",
    "        self.count = min(self.count + 1, self.capacity)\n",
    "\n",
    "    def sample(self, size):\n",
    "        indices = np.random.choice(self.count, size=size)\n",
    "        return (np.stack(self.memory.loc[indices, field]) for field in\n",
    "                self.memory.columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "eed39665",
   "metadata": {},
   "outputs": [],
   "source": [
    "class Net(nn.Module):\n",
    "    def __init__(self, action_n, sample_count, cosine_count=64):\n",
    "        super().__init__()\n",
    "        self.sample_count = sample_count\n",
    "        self.cosine_count = cosine_count\n",
    "        self.conv = nn.Sequential(\n",
    "                nn.Conv2d(4, 32, kernel_size=8, stride=4), nn.ReLU(),\n",
    "                nn.Conv2d(32, 64, kernel_size=4, stride=2), nn.ReLU(),\n",
    "                nn.Conv2d(64, 64, kernel_size=3, stride=1), nn.ReLU(),\n",
    "                nn.Flatten())\n",
    "        self.emb = nn.Sequential(\n",
    "                nn.Linear(in_features=64, out_features=3136), nn.ReLU())\n",
    "        self.fc = nn.Sequential(\n",
    "                nn.Linear(in_features=3136, out_features=512), nn.ReLU(),\n",
    "                nn.Linear(in_features=512, out_features=action_n))\n",
    "\n",
    "    def forward(self, input_tensor, cumprob_tensor):\n",
    "        batch_size = input_tensor.size(0)\n",
    "        logit_tensor = self.conv(input_tensor).unsqueeze(1)\n",
    "        index_tensor = torch.arange(start=1, end=self.cosine_count + 1).view(1,\n",
    "                1, self.cosine_count)\n",
    "        cosine_tensor = torch.cos(index_tensor * np.pi * cumprob_tensor)\n",
    "        emb_tensor = self.emb(cosine_tensor)\n",
    "        prod_tensor = logit_tensor * emb_tensor\n",
    "        output_tensor = self.fc(prod_tensor).transpose(1, 2)\n",
    "        return output_tensor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "a2b9a656",
   "metadata": {},
   "outputs": [],
   "source": [
    "class Agent:\n",
    "    def __init__(self, env):\n",
    "        self.action_n = env.action_space.n\n",
    "        self.gamma = 0.99\n",
    "        self.epsilon = 1.\n",
    "\n",
    "        self.replayer = DQNReplayer(capacity=100000)\n",
    "\n",
    "        self.sample_count = 8\n",
    "        self.evaluate_net = Net(action_n=self.action_n,\n",
    "                sample_count=self.sample_count)\n",
    "        self.target_net = copy.deepcopy(self.evaluate_net)\n",
    "        self.optimizer = optim.Adam(self.evaluate_net.parameters(), lr=0.0001)\n",
    "        self.loss = nn.SmoothL1Loss(reduction=\"none\")\n",
    "\n",
    "    def reset(self, mode=None):\n",
    "        self.mode = mode\n",
    "        if mode == 'train':\n",
    "            self.trajectory = []\n",
    "\n",
    "    def step(self, observation, reward, done):\n",
    "        state_tensor = torch.as_tensor(observation,\n",
    "                dtype=torch.float).unsqueeze(0)\n",
    "        cumprod_tensor = torch.rand(1, self.sample_count, 1)\n",
    "        q_component_tensor = self.evaluate_net(state_tensor, cumprod_tensor)\n",
    "        q_tensor = q_component_tensor.mean(2)\n",
    "        action_tensor = q_tensor.argmax(dim=1)\n",
    "        actions = action_tensor.detach().numpy()\n",
    "        action = actions[0]\n",
    "        if self.mode == 'train':\n",
    "            if np.random.rand() < self.epsilon:\n",
    "                action = np.random.randint(0, self.action_n)\n",
    "            \n",
    "            self.trajectory += [observation, reward, done, action]\n",
    "            if len(self.trajectory) >= 8:\n",
    "                state, _, _, act, next_state, reward, done, _ = \\\n",
    "                        self.trajectory[-8:]\n",
    "                self.replayer.store(state, act, reward, next_state, done)\n",
    "            if self.replayer.count >= 1024 and self.replayer.count % 10 == 0:\n",
    "                self.learn()\n",
    "        return action\n",
    "\n",
    "    def close(self):\n",
    "        pass\n",
    "\n",
    "    def update_net(self, target_net, evaluate_net, learning_rate=0.005):\n",
    "        for target_param, evaluate_param in zip(\n",
    "                target_net.parameters(), evaluate_net.parameters()):\n",
    "            target_param.data.copy_(learning_rate * evaluate_param.data\n",
    "                    + (1 - learning_rate) * target_param.data)\n",
    "\n",
    "    def learn(self):\n",
    "        # replay\n",
    "        batch_size = 32\n",
    "        states, actions, rewards, next_states, dones = \\\n",
    "                self.replayer.sample(batch_size)\n",
    "        state_tensor = torch.as_tensor(states, dtype=torch.float)\n",
    "        reward_tensor = torch.as_tensor(rewards, dtype=torch.float)\n",
    "        done_tensor = torch.as_tensor(dones, dtype=torch.float)\n",
    "        next_state_tensor = torch.as_tensor(next_states, dtype=torch.float)\n",
    "\n",
    "        # calculate target\n",
    "        next_cumprob_tensor = torch.rand(batch_size, self.sample_count, 1)\n",
    "        next_q_component_tensor = self.evaluate_net(next_state_tensor,\n",
    "                next_cumprob_tensor)\n",
    "        next_q_tensor = next_q_component_tensor.mean(2)\n",
    "        next_action_tensor = next_q_tensor.argmax(dim=1)\n",
    "        next_actions = next_action_tensor.detach().numpy()\n",
    "        next_cumprob_tensor = torch.rand(batch_size, self.sample_count, 1)\n",
    "        all_next_q_quantile_tensor = self.target_net(next_state_tensor,\n",
    "                next_cumprob_tensor)\n",
    "        next_q_quantile_tensor = all_next_q_quantile_tensor[\n",
    "                range(batch_size), next_actions, :]\n",
    "        target_quantile_tensor = reward_tensor.reshape(batch_size, 1) \\\n",
    "                + self.gamma * next_q_quantile_tensor \\\n",
    "                * (1. - done_tensor).reshape(-1, 1)\n",
    "\n",
    "        cumprob_tensor = torch.rand(batch_size, self.sample_count, 1)\n",
    "        all_q_quantile_tensor = self.evaluate_net(state_tensor, cumprob_tensor)\n",
    "        q_quantile_tensor = all_q_quantile_tensor[range(batch_size), actions, :]\n",
    "        target_quantile_tensor = target_quantile_tensor.unsqueeze(1)\n",
    "        q_quantile_tensor = q_quantile_tensor.unsqueeze(2)\n",
    "        hubor_loss_tensor = self.loss(target_quantile_tensor, q_quantile_tensor)\n",
    "        comparison_tensor = (target_quantile_tensor <\n",
    "                q_quantile_tensor).detach().float()\n",
    "        quantile_regression_tensor = (cumprob_tensor -\n",
    "                comparison_tensor).abs()\n",
    "        quantile_huber_loss_tensor = (hubor_loss_tensor *\n",
    "                quantile_regression_tensor).sum(-1).mean(1)\n",
    "        loss_tensor = quantile_huber_loss_tensor.mean()\n",
    "        self.optimizer.zero_grad()\n",
    "        loss_tensor.backward()\n",
    "        self.optimizer.step()\n",
    "\n",
    "        self.update_net(self.target_net, self.evaluate_net)\n",
    "\n",
    "        self.epsilon = max(self.epsilon - 1e-5, 0.05)\n",
    "\n",
    "\n",
    "agent = Agent(env)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f02b76e5",
   "metadata": {},
   "source": [
    "Train & Test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "23d77d15",
   "metadata": {},
   "outputs": [],
   "source": [
    "def play_episode(env, agent, max_episode_steps=None, mode=None, render=False):\n",
    "    observation, reward, done = env.reset(), 0., False\n",
    "    agent.reset(mode=mode)\n",
    "    episode_reward, elapsed_steps = 0., 0\n",
    "    while True:\n",
    "        action = agent.step(observation, reward, done)\n",
    "        if render:\n",
    "            env.render()\n",
    "        if done:\n",
    "            break\n",
    "        observation, reward, done, _ = env.step(action)\n",
    "        episode_reward += reward\n",
    "        elapsed_steps += 1\n",
    "        if max_episode_steps and elapsed_steps >= max_episode_steps:\n",
    "            break\n",
    "    agent.close()\n",
    "    return episode_reward, elapsed_steps"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "34ae8868",
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "00:01:02 [INFO] ==== train ====\n",
      "00:01:16 [DEBUG] train episode 0: reward = -19.00, steps = 1010\n",
      "00:01:53 [DEBUG] train episode 1: reward = -21.00, steps = 998\n",
      "00:02:30 [DEBUG] train episode 2: reward = -19.00, steps = 987\n",
      "00:03:02 [DEBUG] train episode 3: reward = -21.00, steps = 848\n",
      "00:03:36 [DEBUG] train episode 4: reward = -21.00, steps = 880\n",
      "00:04:09 [DEBUG] train episode 5: reward = -20.00, steps = 881\n",
      "00:04:45 [DEBUG] train episode 6: reward = -20.00, steps = 943\n",
      "00:05:20 [DEBUG] train episode 7: reward = -21.00, steps = 924\n",
      "00:05:58 [DEBUG] train episode 8: reward = -20.00, steps = 991\n",
      "00:06:31 [DEBUG] train episode 9: reward = -21.00, steps = 819\n",
      "00:07:12 [DEBUG] train episode 10: reward = -20.00, steps = 1033\n",
      "00:07:45 [DEBUG] train episode 11: reward = -21.00, steps = 806\n",
      "00:08:18 [DEBUG] train episode 12: reward = -21.00, steps = 825\n",
      "00:08:56 [DEBUG] train episode 13: reward = -20.00, steps = 969\n",
      "00:09:30 [DEBUG] train episode 14: reward = -21.00, steps = 851\n",
      "00:10:06 [DEBUG] train episode 15: reward = -21.00, steps = 899\n",
      "00:10:43 [DEBUG] train episode 16: reward = -20.00, steps = 917\n",
      "00:11:18 [DEBUG] train episode 17: reward = -21.00, steps = 877\n",
      "00:11:52 [DEBUG] train episode 18: reward = -21.00, steps = 879\n",
      "00:12:38 [DEBUG] train episode 19: reward = -18.00, steps = 1160\n",
      "00:13:09 [DEBUG] train episode 20: reward = -21.00, steps = 777\n",
      "00:13:43 [DEBUG] train episode 21: reward = -20.00, steps = 864\n",
      "00:14:18 [DEBUG] train episode 22: reward = -20.00, steps = 865\n",
      "00:14:56 [DEBUG] train episode 23: reward = -20.00, steps = 959\n",
      "00:15:29 [DEBUG] train episode 24: reward = -21.00, steps = 821\n",
      "00:16:03 [DEBUG] train episode 25: reward = -21.00, steps = 852\n",
      "00:16:42 [DEBUG] train episode 26: reward = -20.00, steps = 973\n",
      "00:17:26 [DEBUG] train episode 27: reward = -18.00, steps = 1117\n",
      "00:18:01 [DEBUG] train episode 28: reward = -20.00, steps = 878\n",
      "00:18:31 [DEBUG] train episode 29: reward = -21.00, steps = 786\n",
      "00:19:16 [DEBUG] train episode 30: reward = -20.00, steps = 1139\n",
      "00:19:53 [DEBUG] train episode 31: reward = -21.00, steps = 942\n",
      "00:20:33 [DEBUG] train episode 32: reward = -21.00, steps = 991\n",
      "00:21:06 [DEBUG] train episode 33: reward = -21.00, steps = 820\n",
      "00:21:41 [DEBUG] train episode 34: reward = -20.00, steps = 904\n",
      "00:22:23 [DEBUG] train episode 35: reward = -19.00, steps = 1063\n",
      "00:22:57 [DEBUG] train episode 36: reward = -21.00, steps = 884\n",
      "00:23:39 [DEBUG] train episode 37: reward = -19.00, steps = 1066\n",
      "00:24:19 [DEBUG] train episode 38: reward = -20.00, steps = 999\n",
      "00:24:59 [DEBUG] train episode 39: reward = -19.00, steps = 1036\n",
      "00:25:30 [DEBUG] train episode 40: reward = -21.00, steps = 790\n",
      "00:26:03 [DEBUG] train episode 41: reward = -21.00, steps = 824\n",
      "00:26:42 [DEBUG] train episode 42: reward = -20.00, steps = 987\n",
      "00:27:19 [DEBUG] train episode 43: reward = -21.00, steps = 937\n",
      "00:27:52 [DEBUG] train episode 44: reward = -20.00, steps = 835\n",
      "00:28:24 [DEBUG] train episode 45: reward = -21.00, steps = 823\n",
      "00:28:56 [DEBUG] train episode 46: reward = -21.00, steps = 823\n",
      "00:29:35 [DEBUG] train episode 47: reward = -19.00, steps = 997\n",
      "00:30:12 [DEBUG] train episode 48: reward = -20.00, steps = 938\n",
      "00:30:46 [DEBUG] train episode 49: reward = -20.00, steps = 866\n",
      "00:31:16 [DEBUG] train episode 50: reward = -21.00, steps = 760\n",
      "00:31:50 [DEBUG] train episode 51: reward = -21.00, steps = 866\n",
      "00:32:27 [DEBUG] train episode 52: reward = -20.00, steps = 929\n",
      "00:33:01 [DEBUG] train episode 53: reward = -20.00, steps = 864\n",
      "00:33:40 [DEBUG] train episode 54: reward = -21.00, steps = 984\n",
      "00:34:14 [DEBUG] train episode 55: reward = -21.00, steps = 882\n",
      "00:34:46 [DEBUG] train episode 56: reward = -21.00, steps = 809\n",
      "00:35:20 [DEBUG] train episode 57: reward = -21.00, steps = 850\n",
      "00:35:52 [DEBUG] train episode 58: reward = -21.00, steps = 815\n",
      "00:36:26 [DEBUG] train episode 59: reward = -21.00, steps = 851\n",
      "00:37:05 [DEBUG] train episode 60: reward = -21.00, steps = 995\n",
      "00:37:42 [DEBUG] train episode 61: reward = -20.00, steps = 947\n",
      "00:38:18 [DEBUG] train episode 62: reward = -21.00, steps = 896\n",
      "00:38:53 [DEBUG] train episode 63: reward = -21.00, steps = 896\n",
      "00:39:32 [DEBUG] train episode 64: reward = -20.00, steps = 977\n",
      "00:40:11 [DEBUG] train episode 65: reward = -21.00, steps = 971\n",
      "00:40:46 [DEBUG] train episode 66: reward = -21.00, steps = 883\n",
      "00:41:22 [DEBUG] train episode 67: reward = -21.00, steps = 877\n",
      "00:41:57 [DEBUG] train episode 68: reward = -21.00, steps = 885\n",
      "00:42:30 [DEBUG] train episode 69: reward = -21.00, steps = 847\n",
      "00:43:05 [DEBUG] train episode 70: reward = -21.00, steps = 865\n",
      "00:43:38 [DEBUG] train episode 71: reward = -21.00, steps = 817\n",
      "00:44:09 [DEBUG] train episode 72: reward = -21.00, steps = 786\n",
      "00:44:39 [DEBUG] train episode 73: reward = -21.00, steps = 782\n",
      "00:45:10 [DEBUG] train episode 74: reward = -21.00, steps = 780\n",
      "00:45:47 [DEBUG] train episode 75: reward = -19.00, steps = 938\n",
      "00:46:23 [DEBUG] train episode 76: reward = -21.00, steps = 908\n",
      "00:47:00 [DEBUG] train episode 77: reward = -20.00, steps = 947\n",
      "00:47:35 [DEBUG] train episode 78: reward = -21.00, steps = 862\n",
      "00:48:13 [DEBUG] train episode 79: reward = -20.00, steps = 986\n",
      "00:48:51 [DEBUG] train episode 80: reward = -19.00, steps = 961\n",
      "00:49:24 [DEBUG] train episode 81: reward = -20.00, steps = 841\n",
      "00:50:03 [DEBUG] train episode 82: reward = -19.00, steps = 977\n",
      "00:50:34 [DEBUG] train episode 83: reward = -21.00, steps = 789\n",
      "00:51:16 [DEBUG] train episode 84: reward = -18.00, steps = 1067\n",
      "00:51:49 [DEBUG] train episode 85: reward = -21.00, steps = 839\n",
      "00:52:32 [DEBUG] train episode 86: reward = -19.00, steps = 1095\n",
      "00:53:05 [DEBUG] train episode 87: reward = -21.00, steps = 850\n",
      "00:53:49 [DEBUG] train episode 88: reward = -21.00, steps = 1106\n",
      "00:54:19 [DEBUG] train episode 89: reward = -21.00, steps = 776\n",
      "00:54:56 [DEBUG] train episode 90: reward = -20.00, steps = 926\n",
      "00:55:36 [DEBUG] train episode 91: reward = -18.00, steps = 1026\n",
      "00:56:12 [DEBUG] train episode 92: reward = -21.00, steps = 927\n",
      "00:56:55 [DEBUG] train episode 93: reward = -20.00, steps = 1097\n",
      "00:57:27 [DEBUG] train episode 94: reward = -21.00, steps = 821\n",
      "00:58:03 [DEBUG] train episode 95: reward = -21.00, steps = 925\n",
      "00:58:38 [DEBUG] train episode 96: reward = -20.00, steps = 899\n",
      "00:59:13 [DEBUG] train episode 97: reward = -21.00, steps = 907\n",
      "00:59:54 [DEBUG] train episode 98: reward = -21.00, steps = 1057\n",
      "01:00:29 [DEBUG] train episode 99: reward = -21.00, steps = 908\n",
      "01:01:06 [DEBUG] train episode 100: reward = -19.00, steps = 942\n",
      "01:01:43 [DEBUG] train episode 101: reward = -20.00, steps = 963\n",
      "01:02:21 [DEBUG] train episode 102: reward = -21.00, steps = 970\n",
      "01:02:59 [DEBUG] train episode 103: reward = -21.00, steps = 989\n",
      "01:03:32 [DEBUG] train episode 104: reward = -20.00, steps = 839\n",
      "01:04:03 [DEBUG] train episode 105: reward = -21.00, steps = 788\n",
      "01:04:44 [DEBUG] train episode 106: reward = -19.00, steps = 1060\n",
      "01:05:17 [DEBUG] train episode 107: reward = -21.00, steps = 851\n",
      "01:05:49 [DEBUG] train episode 108: reward = -21.00, steps = 824\n",
      "01:08:00 [DEBUG] train episode 109: reward = -21.00, steps = 992\n",
      "01:12:43 [DEBUG] train episode 110: reward = -17.00, steps = 1230\n",
      "01:17:14 [DEBUG] train episode 111: reward = -19.00, steps = 1174\n",
      "01:21:43 [DEBUG] train episode 112: reward = -19.00, steps = 1150\n",
      "01:25:13 [DEBUG] train episode 113: reward = -20.00, steps = 908\n",
      "01:29:21 [DEBUG] train episode 114: reward = -19.00, steps = 1067\n",
      "01:32:23 [DEBUG] train episode 115: reward = -21.00, steps = 781\n",
      "01:36:29 [DEBUG] train episode 116: reward = -19.00, steps = 1058\n",
      "01:39:40 [DEBUG] train episode 117: reward = -21.00, steps = 825\n",
      "01:42:43 [DEBUG] train episode 118: reward = -21.00, steps = 787\n",
      "01:47:57 [DEBUG] train episode 119: reward = -16.00, steps = 1356\n",
      "01:51:49 [DEBUG] train episode 120: reward = -21.00, steps = 998\n",
      "01:54:59 [DEBUG] train episode 121: reward = -21.00, steps = 818\n",
      "01:58:06 [DEBUG] train episode 122: reward = -21.00, steps = 807\n",
      "02:01:40 [DEBUG] train episode 123: reward = -20.00, steps = 924\n",
      "02:05:13 [DEBUG] train episode 124: reward = -21.00, steps = 919\n",
      "02:08:50 [DEBUG] train episode 125: reward = -19.00, steps = 938\n",
      "02:13:40 [DEBUG] train episode 126: reward = -19.00, steps = 1255\n",
      "02:18:06 [DEBUG] train episode 127: reward = -20.00, steps = 1147\n",
      "02:23:05 [DEBUG] train episode 128: reward = -18.00, steps = 1283\n",
      "02:28:26 [DEBUG] train episode 129: reward = -20.00, steps = 1391\n",
      "02:33:49 [DEBUG] train episode 130: reward = -16.00, steps = 1393\n",
      "02:37:48 [DEBUG] train episode 131: reward = -20.00, steps = 1038\n",
      "02:43:50 [DEBUG] train episode 132: reward = -18.00, steps = 1577\n",
      "02:50:02 [DEBUG] train episode 133: reward = -19.00, steps = 1617\n",
      "02:55:08 [DEBUG] train episode 134: reward = -19.00, steps = 1332\n",
      "02:59:23 [DEBUG] train episode 135: reward = -19.00, steps = 1114\n",
      "03:04:15 [DEBUG] train episode 136: reward = -20.00, steps = 1279\n",
      "03:08:20 [DEBUG] train episode 137: reward = -20.00, steps = 1071\n",
      "03:14:08 [DEBUG] train episode 138: reward = -16.00, steps = 1528\n",
      "03:19:22 [DEBUG] train episode 139: reward = -18.00, steps = 1379\n",
      "03:25:01 [DEBUG] train episode 140: reward = -17.00, steps = 1480\n",
      "03:29:53 [DEBUG] train episode 141: reward = -20.00, steps = 1284\n",
      "03:35:27 [DEBUG] train episode 142: reward = -19.00, steps = 1472\n",
      "03:40:10 [DEBUG] train episode 143: reward = -20.00, steps = 1253\n",
      "03:45:41 [DEBUG] train episode 144: reward = -17.00, steps = 1469\n",
      "03:49:55 [DEBUG] train episode 145: reward = -20.00, steps = 1125\n",
      "03:54:47 [DEBUG] train episode 146: reward = -20.00, steps = 1295\n",
      "03:59:57 [DEBUG] train episode 147: reward = -19.00, steps = 1381\n",
      "04:05:16 [DEBUG] train episode 148: reward = -19.00, steps = 1368\n",
      "04:10:10 [DEBUG] train episode 149: reward = -21.00, steps = 1309\n",
      "04:16:40 [DEBUG] train episode 150: reward = -18.00, steps = 1740\n",
      "04:23:01 [DEBUG] train episode 151: reward = -21.00, steps = 1696\n",
      "04:28:02 [DEBUG] train episode 152: reward = -21.00, steps = 1348\n",
      "04:33:22 [DEBUG] train episode 153: reward = -19.00, steps = 1427\n",
      "04:40:08 [DEBUG] train episode 154: reward = -17.00, steps = 1832\n",
      "04:48:20 [DEBUG] train episode 155: reward = -15.00, steps = 2212\n",
      "04:54:04 [DEBUG] train episode 156: reward = -18.00, steps = 1550\n",
      "05:00:19 [DEBUG] train episode 157: reward = -17.00, steps = 1684\n",
      "05:06:23 [DEBUG] train episode 158: reward = -18.00, steps = 1624\n",
      "05:11:37 [DEBUG] train episode 159: reward = -19.00, steps = 1384\n",
      "05:18:39 [DEBUG] train episode 160: reward = -15.00, steps = 1857\n",
      "05:25:22 [DEBUG] train episode 161: reward = -17.00, steps = 1756\n",
      "05:31:13 [DEBUG] train episode 162: reward = -20.00, steps = 1531\n",
      "05:38:21 [DEBUG] train episode 163: reward = -15.00, steps = 1874\n",
      "05:46:18 [DEBUG] train episode 164: reward = -15.00, steps = 2089\n",
      "05:53:03 [DEBUG] train episode 165: reward = -14.00, steps = 1784\n",
      "06:02:27 [DEBUG] train episode 166: reward = -8.00, steps = 2475\n",
      "06:10:22 [DEBUG] train episode 167: reward = -13.00, steps = 2094\n",
      "06:17:06 [DEBUG] train episode 168: reward = -18.00, steps = 1777\n",
      "06:27:33 [DEBUG] train episode 169: reward = -8.00, steps = 2764\n",
      "06:33:12 [DEBUG] train episode 170: reward = -17.00, steps = 1489\n",
      "06:39:08 [DEBUG] train episode 171: reward = -17.00, steps = 1565\n",
      "06:45:55 [DEBUG] train episode 172: reward = -15.00, steps = 1793\n",
      "06:55:21 [DEBUG] train episode 173: reward = -9.00, steps = 2487\n",
      "07:02:53 [DEBUG] train episode 174: reward = -13.00, steps = 1994\n",
      "07:09:25 [DEBUG] train episode 175: reward = -14.00, steps = 1722\n",
      "07:17:58 [DEBUG] train episode 176: reward = -8.00, steps = 2260\n",
      "07:27:16 [DEBUG] train episode 177: reward = -5.00, steps = 2458\n",
      "07:32:49 [DEBUG] train episode 178: reward = -16.00, steps = 1462\n",
      "07:38:38 [DEBUG] train episode 179: reward = -15.00, steps = 1536\n",
      "07:45:17 [DEBUG] train episode 180: reward = -15.00, steps = 1750\n",
      "07:53:46 [DEBUG] train episode 181: reward = -7.00, steps = 2240\n",
      "08:03:30 [DEBUG] train episode 182: reward = -7.00, steps = 2569\n",
      "08:16:16 [DEBUG] train episode 183: reward = -3.00, steps = 3346\n",
      "08:26:03 [DEBUG] train episode 184: reward = -4.00, steps = 2609\n",
      "08:33:30 [DEBUG] train episode 185: reward = -9.00, steps = 1992\n",
      "08:40:22 [DEBUG] train episode 186: reward = -11.00, steps = 1843\n",
      "08:46:30 [DEBUG] train episode 187: reward = -14.00, steps = 1647\n",
      "08:52:25 [DEBUG] train episode 188: reward = -14.00, steps = 1587\n",
      "08:56:32 [DEBUG] train episode 189: reward = -19.00, steps = 1106\n",
      "09:06:26 [DEBUG] train episode 190: reward = -5.00, steps = 2545\n",
      "09:13:08 [DEBUG] train episode 191: reward = -11.00, steps = 1818\n",
      "09:22:18 [DEBUG] train episode 192: reward = -6.00, steps = 2472\n",
      "09:29:00 [DEBUG] train episode 193: reward = -13.00, steps = 1810\n",
      "09:37:50 [DEBUG] train episode 194: reward = -8.00, steps = 2387\n",
      "09:44:21 [DEBUG] train episode 195: reward = -14.00, steps = 1754\n",
      "09:53:44 [DEBUG] train episode 196: reward = -6.00, steps = 2533\n",
      "10:03:03 [DEBUG] train episode 197: reward = -4.00, steps = 2511\n",
      "10:11:45 [DEBUG] train episode 198: reward = -8.00, steps = 2351\n",
      "10:18:14 [DEBUG] train episode 199: reward = -13.00, steps = 1755\n",
      "10:27:32 [DEBUG] train episode 200: reward = -7.00, steps = 2485\n",
      "10:34:10 [DEBUG] train episode 201: reward = -12.00, steps = 1789\n",
      "10:44:14 [DEBUG] train episode 202: reward = -4.00, steps = 2719\n",
      "10:50:20 [DEBUG] train episode 203: reward = -12.00, steps = 1652\n",
      "10:57:59 [DEBUG] train episode 204: reward = -10.00, steps = 2065\n",
      "11:02:51 [DEBUG] train episode 205: reward = -17.00, steps = 1313\n",
      "11:11:48 [DEBUG] train episode 206: reward = -7.00, steps = 2423\n",
      "11:21:12 [DEBUG] train episode 207: reward = -7.00, steps = 2535\n",
      "11:28:05 [DEBUG] train episode 208: reward = -12.00, steps = 1865\n",
      "11:33:23 [DEBUG] train episode 209: reward = -16.00, steps = 1432\n",
      "11:38:41 [DEBUG] train episode 210: reward = -15.00, steps = 1441\n",
      "11:48:28 [DEBUG] train episode 211: reward = -4.00, steps = 2664\n",
      "11:54:04 [DEBUG] train episode 212: reward = -15.00, steps = 1527\n",
      "12:03:45 [DEBUG] train episode 213: reward = -6.00, steps = 2640\n",
      "12:09:56 [DEBUG] train episode 214: reward = -14.00, steps = 1688\n",
      "12:15:53 [DEBUG] train episode 215: reward = -13.00, steps = 1627\n",
      "12:24:53 [DEBUG] train episode 216: reward = -7.00, steps = 2441\n",
      "12:31:15 [DEBUG] train episode 217: reward = -13.00, steps = 1731\n",
      "12:38:12 [DEBUG] train episode 218: reward = -12.00, steps = 1895\n",
      "12:45:46 [DEBUG] train episode 219: reward = -10.00, steps = 2062\n",
      "12:53:48 [DEBUG] train episode 220: reward = -8.00, steps = 2192\n",
      "13:02:22 [DEBUG] train episode 221: reward = -4.00, steps = 2350\n",
      "13:08:46 [DEBUG] train episode 222: reward = -11.00, steps = 1751\n",
      "13:15:57 [DEBUG] train episode 223: reward = -11.00, steps = 1969\n",
      "13:22:44 [DEBUG] train episode 224: reward = -13.00, steps = 1848\n",
      "13:30:07 [DEBUG] train episode 225: reward = -9.00, steps = 2022\n",
      "13:39:23 [DEBUG] train episode 226: reward = -6.00, steps = 2545\n",
      "13:47:45 [DEBUG] train episode 227: reward = -8.00, steps = 2300\n",
      "13:55:39 [DEBUG] train episode 228: reward = -8.00, steps = 2178\n",
      "14:01:37 [DEBUG] train episode 229: reward = -14.00, steps = 1636\n",
      "14:09:04 [DEBUG] train episode 230: reward = -8.00, steps = 2048\n",
      "14:16:44 [DEBUG] train episode 231: reward = -9.00, steps = 2111\n",
      "14:26:00 [DEBUG] train episode 232: reward = -5.00, steps = 2545\n",
      "14:33:34 [DEBUG] train episode 233: reward = -9.00, steps = 2081\n",
      "14:38:49 [DEBUG] train episode 234: reward = -16.00, steps = 1437\n",
      "14:47:27 [DEBUG] train episode 235: reward = -3.00, steps = 2377\n",
      "14:55:44 [DEBUG] train episode 236: reward = -7.00, steps = 2286\n",
      "15:01:07 [DEBUG] train episode 237: reward = -14.00, steps = 1486\n",
      "15:09:14 [DEBUG] train episode 238: reward = -8.00, steps = 2237\n",
      "15:16:58 [DEBUG] train episode 239: reward = -7.00, steps = 2140\n",
      "15:23:32 [DEBUG] train episode 240: reward = -11.00, steps = 1806\n",
      "15:32:16 [DEBUG] train episode 241: reward = -7.00, steps = 2414\n",
      "15:40:12 [DEBUG] train episode 242: reward = -10.00, steps = 2193\n",
      "15:50:41 [DEBUG] train episode 243: reward = -3.00, steps = 2900\n",
      "16:01:38 [DEBUG] train episode 244: reward = 1.00, steps = 3027\n",
      "16:12:12 [DEBUG] train episode 245: reward = 2.00, steps = 2924\n",
      "16:22:00 [DEBUG] train episode 246: reward = 5.00, steps = 2698\n",
      "16:30:19 [DEBUG] train episode 247: reward = -6.00, steps = 2307\n",
      "16:39:20 [DEBUG] train episode 248: reward = -3.00, steps = 2501\n",
      "16:46:44 [DEBUG] train episode 249: reward = -8.00, steps = 2050\n",
      "16:56:31 [DEBUG] train episode 250: reward = 7.00, steps = 2715\n",
      "17:04:06 [DEBUG] train episode 251: reward = 14.00, steps = 2106\n",
      "17:12:43 [DEBUG] train episode 252: reward = -4.00, steps = 2396\n",
      "17:20:36 [DEBUG] train episode 253: reward = 11.00, steps = 2190\n",
      "17:28:15 [DEBUG] train episode 254: reward = 14.00, steps = 2112\n",
      "17:37:49 [DEBUG] train episode 255: reward = 5.00, steps = 2654\n",
      "17:45:13 [DEBUG] train episode 256: reward = 15.00, steps = 2059\n",
      "17:52:22 [DEBUG] train episode 257: reward = 15.00, steps = 1996\n",
      "18:00:37 [DEBUG] train episode 258: reward = 13.00, steps = 2299\n",
      "18:09:32 [DEBUG] train episode 259: reward = 9.00, steps = 2479\n",
      "18:17:42 [DEBUG] train episode 260: reward = 10.00, steps = 2273\n",
      "18:26:23 [DEBUG] train episode 261: reward = -1.00, steps = 2420\n",
      "18:33:13 [DEBUG] train episode 262: reward = 16.00, steps = 1898\n",
      "18:41:09 [DEBUG] train episode 263: reward = 10.00, steps = 2214\n",
      "18:47:55 [DEBUG] train episode 264: reward = 17.00, steps = 1892\n",
      "18:55:56 [DEBUG] train episode 265: reward = 14.00, steps = 2238\n",
      "19:03:47 [DEBUG] train episode 266: reward = 9.00, steps = 2189\n",
      "19:10:55 [DEBUG] train episode 267: reward = 15.00, steps = 1988\n",
      "19:19:26 [DEBUG] train episode 268: reward = 1.00, steps = 2371\n",
      "19:28:20 [DEBUG] train episode 269: reward = 7.00, steps = 2471\n",
      "19:36:47 [DEBUG] train episode 270: reward = -5.00, steps = 2360\n",
      "19:43:53 [DEBUG] train episode 271: reward = 16.00, steps = 1978\n",
      "19:53:39 [DEBUG] train episode 272: reward = 1.00, steps = 2621\n",
      "20:01:39 [DEBUG] train episode 273: reward = 16.00, steps = 2120\n",
      "20:11:26 [DEBUG] train episode 274: reward = -5.00, steps = 2511\n",
      "20:18:54 [DEBUG] train episode 275: reward = 17.00, steps = 1895\n",
      "20:26:55 [DEBUG] train episode 276: reward = 16.00, steps = 1982\n",
      "20:35:59 [DEBUG] train episode 277: reward = 12.00, steps = 2214\n",
      "20:48:07 [DEBUG] train episode 278: reward = 5.00, steps = 2548\n",
      "20:56:10 [DEBUG] train episode 279: reward = 16.00, steps = 1904\n",
      "21:04:37 [DEBUG] train episode 280: reward = 14.00, steps = 2015\n",
      "21:14:03 [DEBUG] train episode 281: reward = 12.00, steps = 2202\n",
      "21:24:20 [DEBUG] train episode 282: reward = 7.00, steps = 2563\n",
      "21:31:39 [DEBUG] train episode 283: reward = 18.00, steps = 1840\n",
      "21:39:22 [DEBUG] train episode 284: reward = 15.00, steps = 1983\n",
      "21:47:20 [DEBUG] train episode 285: reward = 11.00, steps = 2054\n",
      "21:54:58 [DEBUG] train episode 286: reward = 17.00, steps = 1934\n",
      "22:03:09 [DEBUG] train episode 287: reward = 15.00, steps = 2069\n",
      "22:10:30 [DEBUG] train episode 288: reward = 17.00, steps = 1851\n",
      "22:19:17 [DEBUG] train episode 289: reward = 12.00, steps = 2190\n",
      "22:27:54 [DEBUG] train episode 290: reward = 14.00, steps = 2149\n",
      "22:37:09 [DEBUG] train episode 291: reward = 9.00, steps = 2289\n",
      "22:45:05 [DEBUG] train episode 292: reward = 13.00, steps = 2046\n",
      "22:52:17 [DEBUG] train episode 293: reward = 17.00, steps = 1820\n",
      "23:01:21 [DEBUG] train episode 294: reward = 16.00, steps = 2217\n",
      "23:08:47 [DEBUG] train episode 295: reward = 18.00, steps = 1843\n",
      "23:16:19 [DEBUG] train episode 296: reward = 17.00, steps = 1865\n",
      "23:16:19 [INFO] ==== test ====\n",
      "23:16:43 [DEBUG] test episode 0: reward = 20.00, steps = 1664\n",
      "23:17:09 [DEBUG] test episode 1: reward = 20.00, steps = 1732\n",
      "23:17:33 [DEBUG] test episode 2: reward = 19.00, steps = 1700\n",
      "23:17:59 [DEBUG] test episode 3: reward = 19.00, steps = 1787\n",
      "23:18:25 [DEBUG] test episode 4: reward = 20.00, steps = 1664\n",
      "23:18:59 [DEBUG] test episode 5: reward = 20.00, steps = 1665\n",
      "23:19:37 [DEBUG] test episode 6: reward = 19.00, steps = 1765\n",
      "23:20:10 [DEBUG] test episode 7: reward = 19.00, steps = 1696\n",
      "23:20:39 [DEBUG] test episode 8: reward = 19.00, steps = 1697\n",
      "23:21:07 [DEBUG] test episode 9: reward = 20.00, steps = 1725\n",
      "23:21:32 [DEBUG] test episode 10: reward = 20.00, steps = 1662\n",
      "23:21:57 [DEBUG] test episode 11: reward = 19.00, steps = 1740\n",
      "23:22:21 [DEBUG] test episode 12: reward = 19.00, steps = 1699\n",
      "23:22:46 [DEBUG] test episode 13: reward = 19.00, steps = 1700\n",
      "23:23:12 [DEBUG] test episode 14: reward = 19.00, steps = 1763\n",
      "23:23:37 [DEBUG] test episode 15: reward = 20.00, steps = 1728\n",
      "23:24:03 [DEBUG] test episode 16: reward = 19.00, steps = 1745\n",
      "23:24:31 [DEBUG] test episode 17: reward = 19.00, steps = 1726\n",
      "23:24:57 [DEBUG] test episode 18: reward = 19.00, steps = 1702\n",
      "23:25:22 [DEBUG] test episode 19: reward = 19.00, steps = 1736\n",
      "23:25:46 [DEBUG] test episode 20: reward = 19.00, steps = 1699\n",
      "23:26:11 [DEBUG] test episode 21: reward = 19.00, steps = 1706\n",
      "23:26:36 [DEBUG] test episode 22: reward = 19.00, steps = 1699\n",
      "23:27:00 [DEBUG] test episode 23: reward = 20.00, steps = 1670\n",
      "23:27:25 [DEBUG] test episode 24: reward = 19.00, steps = 1702\n",
      "23:27:51 [DEBUG] test episode 25: reward = 19.00, steps = 1699\n",
      "23:28:16 [DEBUG] test episode 26: reward = 19.00, steps = 1766\n",
      "23:28:42 [DEBUG] test episode 27: reward = 20.00, steps = 1716\n",
      "23:29:08 [DEBUG] test episode 28: reward = 20.00, steps = 1661\n",
      "23:29:33 [DEBUG] test episode 29: reward = 19.00, steps = 1762\n",
      "23:30:01 [DEBUG] test episode 30: reward = 19.00, steps = 1741\n",
      "23:30:29 [DEBUG] test episode 31: reward = 19.00, steps = 1846\n",
      "23:30:53 [DEBUG] test episode 32: reward = 20.00, steps = 1662\n",
      "23:31:20 [DEBUG] test episode 33: reward = 19.00, steps = 1821\n",
      "23:31:46 [DEBUG] test episode 34: reward = 19.00, steps = 1706\n",
      "23:32:11 [DEBUG] test episode 35: reward = 19.00, steps = 1702\n",
      "23:32:36 [DEBUG] test episode 36: reward = 19.00, steps = 1761\n",
      "23:33:00 [DEBUG] test episode 37: reward = 19.00, steps = 1702\n",
      "23:33:24 [DEBUG] test episode 38: reward = 20.00, steps = 1666\n",
      "23:33:49 [DEBUG] test episode 39: reward = 19.00, steps = 1759\n",
      "23:34:13 [DEBUG] test episode 40: reward = 20.00, steps = 1665\n",
      "23:34:39 [DEBUG] test episode 41: reward = 19.00, steps = 1759\n",
      "23:35:04 [DEBUG] test episode 42: reward = 19.00, steps = 1746\n",
      "23:35:28 [DEBUG] test episode 43: reward = 19.00, steps = 1700\n",
      "23:35:54 [DEBUG] test episode 44: reward = 19.00, steps = 1768\n",
      "23:36:21 [DEBUG] test episode 45: reward = 19.00, steps = 1878\n",
      "23:36:46 [DEBUG] test episode 46: reward = 19.00, steps = 1762\n",
      "23:37:11 [DEBUG] test episode 47: reward = 20.00, steps = 1670\n",
      "23:37:35 [DEBUG] test episode 48: reward = 19.00, steps = 1698\n",
      "23:37:59 [DEBUG] test episode 49: reward = 20.00, steps = 1666\n",
      "23:38:24 [DEBUG] test episode 50: reward = 19.00, steps = 1763\n",
      "23:38:49 [DEBUG] test episode 51: reward = 19.00, steps = 1729\n",
      "23:39:13 [DEBUG] test episode 52: reward = 19.00, steps = 1701\n",
      "23:39:38 [DEBUG] test episode 53: reward = 19.00, steps = 1723\n",
      "23:40:02 [DEBUG] test episode 54: reward = 20.00, steps = 1661\n",
      "23:40:26 [DEBUG] test episode 55: reward = 20.00, steps = 1662\n",
      "23:40:50 [DEBUG] test episode 56: reward = 20.00, steps = 1660\n",
      "23:41:14 [DEBUG] test episode 57: reward = 20.00, steps = 1661\n",
      "23:41:40 [DEBUG] test episode 58: reward = 19.00, steps = 1825\n",
      "23:42:04 [DEBUG] test episode 59: reward = 19.00, steps = 1701\n",
      "23:42:29 [DEBUG] test episode 60: reward = 19.00, steps = 1759\n",
      "23:42:54 [DEBUG] test episode 61: reward = 20.00, steps = 1660\n",
      "23:43:21 [DEBUG] test episode 62: reward = 19.00, steps = 1766\n",
      "23:43:47 [DEBUG] test episode 63: reward = 19.00, steps = 1702\n",
      "23:44:11 [DEBUG] test episode 64: reward = 20.00, steps = 1662\n",
      "23:44:35 [DEBUG] test episode 65: reward = 20.00, steps = 1728\n",
      "23:44:59 [DEBUG] test episode 66: reward = 20.00, steps = 1670\n",
      "23:45:23 [DEBUG] test episode 67: reward = 19.00, steps = 1703\n",
      "23:45:48 [DEBUG] test episode 68: reward = 19.00, steps = 1800\n",
      "23:46:13 [DEBUG] test episode 69: reward = 20.00, steps = 1724\n",
      "23:46:37 [DEBUG] test episode 70: reward = 19.00, steps = 1700\n",
      "23:47:01 [DEBUG] test episode 71: reward = 20.00, steps = 1666\n",
      "23:47:25 [DEBUG] test episode 72: reward = 20.00, steps = 1668\n",
      "23:47:48 [DEBUG] test episode 73: reward = 20.00, steps = 1670\n",
      "23:48:12 [DEBUG] test episode 74: reward = 20.00, steps = 1666\n",
      "23:48:36 [DEBUG] test episode 75: reward = 20.00, steps = 1670\n",
      "23:49:00 [DEBUG] test episode 76: reward = 19.00, steps = 1702\n",
      "23:49:24 [DEBUG] test episode 77: reward = 20.00, steps = 1664\n",
      "23:49:48 [DEBUG] test episode 78: reward = 20.00, steps = 1660\n",
      "23:50:12 [DEBUG] test episode 79: reward = 19.00, steps = 1704\n",
      "23:50:37 [DEBUG] test episode 80: reward = 19.00, steps = 1701\n",
      "23:51:02 [DEBUG] test episode 81: reward = 19.00, steps = 1702\n",
      "23:51:27 [DEBUG] test episode 82: reward = 19.00, steps = 1759\n",
      "23:51:53 [DEBUG] test episode 83: reward = 19.00, steps = 1780\n",
      "23:52:17 [DEBUG] test episode 84: reward = 20.00, steps = 1664\n",
      "23:52:42 [DEBUG] test episode 85: reward = 19.00, steps = 1763\n",
      "23:53:06 [DEBUG] test episode 86: reward = 19.00, steps = 1704\n",
      "23:53:31 [DEBUG] test episode 87: reward = 20.00, steps = 1728\n",
      "23:53:55 [DEBUG] test episode 88: reward = 20.00, steps = 1664\n",
      "23:54:21 [DEBUG] test episode 89: reward = 19.00, steps = 1759\n",
      "23:54:45 [DEBUG] test episode 90: reward = 20.00, steps = 1665\n",
      "23:55:10 [DEBUG] test episode 91: reward = 19.00, steps = 1700\n",
      "23:55:35 [DEBUG] test episode 92: reward = 19.00, steps = 1759\n",
      "23:56:00 [DEBUG] test episode 93: reward = 20.00, steps = 1729\n",
      "23:56:27 [DEBUG] test episode 94: reward = 20.00, steps = 1804\n",
      "23:56:51 [DEBUG] test episode 95: reward = 20.00, steps = 1660\n",
      "23:57:15 [DEBUG] test episode 96: reward = 20.00, steps = 1665\n",
      "23:57:40 [DEBUG] test episode 97: reward = 19.00, steps = 1758\n",
      "23:58:04 [DEBUG] test episode 98: reward = 20.00, steps = 1661\n",
      "23:58:30 [DEBUG] test episode 99: reward = 19.00, steps = 1829\n",
      "23:58:30 [INFO] average episode reward = 19.40 ± 0.49\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXkAAAD4CAYAAAAJmJb0AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAABJ3UlEQVR4nO29eZgcV3nv/z219DarZM2MlpEsyba8SbJsZINjcGzAGyEIwnJNCHCBG0OCc3OTkFwIWUhy4XJZwy8mBEMM/LiAgRCCiY2NV7xiWbKtxbY2W9tIo5nRSDM900t1V9W5f1Sd06eqq3t6Zrpnplvv53n89HR1ddWpbutbb3/f97yHcc5BEARBtCbafA+AIAiCaBwk8gRBEC0MiTxBEEQLQyJPEATRwpDIEwRBtDDGfA9AZcmSJXz16tXzPQyCIIimYvv27Sc55z1Rry0okV+9ejW2bds238MgCIJoKhhjhyu9RnYNQRBEC0MiTxAE0cKQyBMEQbQwJPIEQRAtDIk8QRBEC0MiTxAE0cKQyBMEQbQwJPIEQRB1wnU5fvTMURRst+b3PL7/JA4MTzRsTCTyBEEQdeKe3YP4i5/sxG0P7a/5PX/24+fx2V/sbdiYSOQJgiDqhIjgj5zK1rS/7bgYnrCw69hYw8ZEIk8QBFEnUjGvU8yk5dS0/8nJAjgHhtIWhtL5hoyJRJ4gCKJOJExPUrMFu6b9VWHfNTDekDGRyBMEcUazf2gC+4dqS3yO54p4dN9Ixdddf83sjOWJ/EN7hpAvOnh03wgm8sXAvtsOncLOYyVhV/+uJwuqCyVBEMRcc92XHwUAHPrsb02579///EX85NkBPPhnv4lzetrLXi86vsgXHOw9MYEPfnsbrlx7Fp56ZRS/tWEZvvqey+S+7/iXp+TfZ7XFsGcwPdtLiYREniAIokbyRc9rf/7IWKTIO24pkrddLwn71CujAICB06VkLPcjfgDQGHDPH78OPe3xhoyZ7BqCIIgaWdfXAQDYfTzaWik6nrBnLFsKvqAjYcq/c8VSYranI46+zgQ0jdV7uABI5AmCIGrGNDwhfuF4tLViK3ZNvhicENWZLBknk1YpMdvXmaj3MAOQyBMEcUZy6GQGg+M5+TxfLC97HMsWsOdEGhP5InYfG4dwWV44Ng7OORyXY+vBU3J/YdE4LodlB4/XFiuJfEYpseztIJEnCIKoO9d84RFc+b8fks/HssWyfb752EG85xtP4/tPH8Hbv/ZkyY4pOBiZtPCrfcN419efwj6/OsdWLJpwJC/eC5SqbwDggqUd9bmgCpDIEwRBwCuPDJMp2Ji0vP8s25V2DABM5m1M5D2xFklV9fXRSStwLNWHF3bNtz9wOf7kunX1u4gI6iLyjLE7GGPDjLHdyrZPMcaOMcae9/97Uz3ORRAE0QiiRN51OVzflgGCkXrGcqSoD6U9QVejdbFNkC04yns9kV+UikFvUMJVUK9I/tsAbozY/mXO+Sb/v3vqdC6CIIi6M5YtlG1zuSfsUuQVEZ9UyiSHfUFXK2qGJ4JtCvIRkXxbvPFV7HURec75owBOTbkjQRDEHLP3xAQefGmobMZpmPFcEaczhUAy1uEcnJci+GAkb8vnQ76gq6+HI3nVrhGJ1/ZmEfkq3MoY2+nbOYuidmCM3cIY28YY2zYyUnm6MEEQxEx4x788iQ99Zxtue/hA1f3Gc0W8+jMPBpKxYtKS6C4ZSJ4WbGnXDPs9aNTXRSTPfDcmyq5pi+szuqbp0EiR/xqAcwBsAjAI4ItRO3HOb+ecb+acb+7p6WngcAiCONPgnJeSo6dyVfcdzxVRcIIVMcJ+ESIfSLxathR1EbUHE68FMAY888k34u2X9SNfiLBrYk0cyXPOhzjnDufcBfANAFc06lwEQRBRBO2T6q18IxOv/tuF+Fe0a9Llds1oxkLc0LCkPY62uB6ya2ykYnrDZrmqNEzkGWPLlKdvA7C70r4EQRCNQI2shyaqi3xUnbwbjuRdF20xz2KZtBwZ6Z+ctGA7LmzHheELd77oIm54+yZNPWjXFOw5SboCdWpQxhj7AYBrACxhjA0A+FsA1zDGNgHgAA4B+HA9zkUQBFGJwfEc+jq8PjDD6TwM3YtjdY1hKG2Bcw7GoqPn6EjeE3FLsWsMXUMq5kXjGjP8/YDRTAG2y5GKeYJuu1z2l0/GdFi2i6F0HotSMUxazpwkXYE6iTzn/N0Rm/+1HscmCIKohaOnsnjd5x7Gn163Dr//urW44jMP4saLlwIAVnQnceRUFuO5IrpTscj3R4m8E7Jrio4LjXmljxnLRtIsJU6H0nnYrgtT19AWZxjPFZEwS5E8ALz6Mw/iXZv7kbHsOUm6AjTjlSCIFmHgtJdYffzASZz2a97ve/EEAE/kgfKyRkHc0CJ717iyusZ7zXY5NMbQHjf8OvlgyaTtcOgak1F63PAkNhUrCfoDLw1j0rLnJOkKkMgTBNEiCH/c0JisXjF9u6Z/kRD5aF++O2UGPHNB2JMvOi40jaEtrnuJ18AM1zyKDvcjeU/URSSfUCL+zoThR/Ik8gRBEDVT9GefGromrZe4L/IrphD5rqQZqH4RyEjeKXWX1Jg3iSljeb57e9yAxrxaedt1YehMCnhCJF6VSL4jYZLIEwRBTBdRSWNqTFbKmL5dIuya4Ylou6Y7GQvUsQtkCaWSeNUVu6bouLJMcihtwXZDdo1Zbtd0Jg0/8UqePEEQRM04MpJnMpI3da+Spj1uoDNhyJmpYbpSJjIFu2y7sGvE2q1F1wVjXqSeKXirP+kaQ19nAkMTediOC1PTpN8uSihVuyZpelYPefIEQRDTQAixoZXsmpgfyRu6ho6EKWe/qmusAkB30oSSQ5Wvu7x8xqumlaprhAff1xmXideAXSMj+ZKgZwsOckWH7BqCIIjpIBOvOsO4X12j+zXxpu5ZKCJaDy2/iu6UGXguXhcllJaSeNUD1TWeB9/bmfA9ea+Ovj2UeFVLLU/6febnqk6eRJ4giJZA9JHRtZJdI1ZnEhUvovtjeJHtcO28eJ1XKKFsixnIF11YRdezazoSGM0UkCs4MLRSJB9VQnly0rsBUSRPEAQxDdQSSiHyWT9yF8IrSivdkF3TlQxG8uJYskGZUl3DWKl75HiuCFPz7BoAOD6eC4h8VAmlqOGnyVAEQRDToCjtGg1jvsiLskjPQjFki99wJB8WebEYSNiTLzpuoHpmLFf07Rpf5MdyMP1zAaonXxJ0cX8hu4YgCGIaCCE2lUheJGNNPxkqRV6J5BkDOhJBwfU1vvTo7247nl0jSiOzBVsmdcV+auJVVNe0xQ386MNX4u2X9ctzkF1DEAQxDYTIq5OhBIamyWQpUCqNBICYrgWqX4DySF7dzhiT4p2xPA9eTawaGlMSryWJvWLNYvR0xOVziuQJgiCmgeUnRw2NYTzUNjhm+K0ICg64sjA34CVHVTsFUDz5kMgXHQ5d824MgB/JayzguRuaVubJC9TzUCRPEAQxDUQkz1HeUVIIr+NyWLYbEO+YoZeJsWg8Fi61tB03ZNc4MPXgTSJo1wQlVo34KfFKEAQxDYTIT+SLge6QgCe8wh6ZtGzptQOeECcrRPKuG7ZrPE9eRPKAV7IZtmtEIjccrSeU8zRVP3mCIIj5RkxYErXxKqZeajWQsWzoyrJ7cUNDyqwg8mWevNegLKZE6KbOAjcJQ9ewdkkbbvvdS/HGC/sC7xfn0Vgwqm8kFMkTBNESiEhePKqotevhSD4WEclXsmscP5IXiVfv2Brihgax4JSpMzDG8OaNy8tsIHGetphRcYWqelMXkWeM3cEYG2aM7Va2LWaM3c8Y2+8/LqrHuQiCIKIQideCExHJG6Xa9YzlBDz5uBEUaaAUwYftGgDQNBaI5A1f1EVkbmiVZVWK/BxZNUD9IvlvA7gxtO3jAB7knJ8H4EH/OUEQREMQ4m5HibxWWsgjY9mB6pqYoQVE2jtGtF0DeFaLmlAVC3eL5KtqBYUR55irpCtQJ5HnnD8K4FRo8xYA3/H//g6At9bjXARBEFFYvhcvxD6QDFUSrx/49jP48gP75GtRTcQqlVAC8O0aNZLXAscR7Y2jEOeYq6Qr0NjEax/nfBAAOOeDjLHeqJ0YY7cAuAUAVq1a1cDhEATRysjFtm1PmBOmVmproHjyAHD3zkEAwAevWoPfvmQZAN9KyXivi8lQERoPPWTXCFGXdo1eOXZONbFdM2M457dzzjdzzjf39PTM93AIgmhSRHWNFYrkDY3JhT7CvHrtYly6alFgf6Bk04R73AAIzHj1jh/sT2NUsWsSZmuJ/BBjbBkA+I/DDTwXQRBnOLLnu/9Ysk88mWuLlfvgupJtVStspvLk1UheePBC+KslXsWNYC7tmkaK/F0A3u///X4AP2vguQiCOMORJZR+JF9aFYr5jyW5W+QvEqImSaM8+ajqGp0x6BqTEbspjx98jKJUXdNkiVfG2A8APAXgfMbYAGPsQwA+C+A6xth+ANf5zwmCIBqCKKEUi4cIQTUjPHJxQ9AUkVdbEzg8uk4egKxvV5cWVM9TLfGaMHS/H32TJV455++u8NIb6nF8giCIqSiE7Rqj3CP/ys2b8Of/tlNaO6pd85HfPAcXL+/CbQ8fkJOhoqtrvMeYoSHrrwQFlERer2LXaBrDZ962AVesWTyja5wJ8554JQiCqAeWtGtK1TVAMJLfsmkFNp+9SIq4qsevXnsWbrh4KQDAcYLL/6mUPHg/kteCEXy1SB4A3n3FKpzT0z6NK5sdJPIEQbQEpbYGnm1TqW5d9eH1UGsBIfq2W7m6Riuza4KRfLXE63ywsEZDEAQxQ0qevCfMlerWTb28MkYgBNqt4slroWoaUw/bNTO/hkawwIZDEAQxM8LVNXEzum5dfa5p0VG+XaELJaB48iEPXoh9RFeFeYVEniCIpsd2XBl1C4slaUZX16jPwzcAIfKOWP6vil0TN8NevPe8uMBUnkSeIIimx4poLywSr+G6ddWi0Vh0lF+aDFV+LunJ6+HEK4k8QRBEQ4jqIS8Tr6FEqCr6YU9ePJdtDaYooQRQNikqqtXxfEIiTxBE01MtkjeNoJCrol+eeA168lEllNKuEXX4IbtG/ApYKNDyfwRBND1RkXylRTx0vbJdIxKxD+8ZxniuGF1CGa6T98VdRPbh9WXnGxJ5giCaniiLJF6hTt7UKts1IpJ/4KVh7BgYr+DJ+8f3RV0c7wNXrcbh0Sw+9No1M7uIBkEiTxBE0xNV6pioEMmrdfPhyVCq6FdKoIYnQ4n3dCRMfPFdl0x36A2HPHmCIJqeKFslYURX16jPw5NTVZGPsoDUfYTIRzVAW0gs7NERBEHUgBB5VaRFJB8LibBRxa6pReRZyK6p1lp4IUAiTxBE0yNEXvXfRavhskheq2zXqK9VSqBWsmsWKiTyBEE0PUKQ1ahdthoum/Faua1BLXodXgmK7BqCIIgGIxKv6rJ8sk6+zJKpHMkzf9Wnaoi3hCdDLVQaXl3DGDsEYAKAA8DmnG9u9DkJgjizEBOQApF8xS6UlSN5wIvUoxK58nU5Gao5Eq9zVUJ5Lef85BydiyCIMwwRyZtGuciHRbha4lW8XqhyLvLkCYIgGky+6OArD+yXPeQjPflQl0hBtTp5YGrRDrcanmolqPlmLkSeA/glY2w7Y+yW8IuMsVsYY9sYY9tGRkbmYDgEQTQ7X//VK/jyA/tw59ajAEotgdWoPWZoeNOGpbh8dXA91WA/+fJjTyny/uuXrurGlWvPwvLu5IyuYa6YC7vmKs75ccZYL4D7GWN7OOePihc557cDuB0ANm/evLCaPhAEsSA5OWkBKCVBRSSv2jWGpuGf3/OqsvdOGclHbFMRds25vR34wS2vmd7A54GGR/Kc8+P+4zCAnwK4otHnJAiitclYNgCgLebFqY60a9TmY9HvNau0Ggai2wurLHALvoyGijxjrI0x1iH+BnA9gN2NPCdBEK1PpuCJfMqf8OSE7BpdY2AVInIx4YkxRO4zVavgqIqchUyj7Zo+AD/1P0gDwPc55/c2+JwEQbQ4GcsJPHdCdfLVLBcRvVfaZ6qVncLtiRc6DRV5zvkrABZeWzaCIJqaSd+ukQtuR0TylRB2TaV9puoH32SBPJVQEgTRfGQLQuRd/zEUyVdRYmOKG0G1iVBA80XyJPIEQTQdwq4pOsFIPlZDJG9MYddMBYk8QRBEgxGJV5EkDU+GqkXkZ5pAJbuGIAiiwYgSSse3axzZ1qC63w5MbdeEEYG7eFzobQzCkMgTBNF0CJtGPDp+RYxMvFaxVETitVbbxZTtC0TpJYk8QRBEw8gXS+WTtozkvee12DWyhLJG9RPHrOXYCxESeYIgmop0rij/Fl68sG1qqa6pJdoP7s8Cj02m8STyBEE0F+OqyAu7xp+/JAS82kIe0028kl1DEAQxhwRF3rdr3KAnX03ARVuDWm2XsMjPtPRyviCRJwiiqTiRzsu/i24wkq9lST6xsHetYi2OKR6j2hMvZOZqZSiCIIi6sOvYOGK6BpdzOTtVllDWUDkjRL52uybsyVMkTxAE0TB2DYzjwmUdSMZ02UzMcV3oGpMCXDXxqk038Rq0a0jkCYIgGoTrcuw6No4N/V0wdS2QeK1V5PVpR/K+BUQiTxAE0VgOn8piIm9jw4ou6Bor1cm7LnTGZO17TZH8NOrkGVOqcppL40nkCYJoHvYNTQAALlzWCVNjgUjeqDGSnyrxumlld+D5ikVJLOtMSHFvtkVDSOQJgmgaxESo7mQMhq4FJkNpGptyQRBg6jr5H3/kSjz1idfL5++78mw8+GfXyBsI2TUhGGM3Msb2MsYOMMY+3ujzEQTRusi1XeM6DJ2VEq+cw1BE3tArCzFj3n6VbgSmrqEtbgSeJ2O68iuhLpcyZzR6jVcdwFcB3ATgIgDvZoxd1MhzEgTRumQKXt+atrgBI2DXcGiKXTNVtG1obIoJU+qC4EELiGa8BrkCwAHO+Suc8wKAOwFsafA5CYJoUSYtG4bGEDc0GJpq14Qi+Sl8c1PXpmh9UJJG8afQdrJrgqwAcFR5PuBvkzDGbmGMbWOMbRsZGWnwcAiCaGaylo22uAHGGEydBZb/05gSyU8h8rpyQ4hCvQHooUie2hoEifo0Agsocs5v55xv5pxv7unpafBwCIJoZiYtB+2+X64rdo3r8oBwTx3Js6oRuaYxZbGQoAXUZMU1DRf5AQArlef9AI43+JwEQbQoGctGW1wH4E1OEolX27drai1zNDRtygZlZqiRmRB58uSDPAPgPMbYGsZYDMDNAO5q8DkJgmhRMgVbVr6YOpO9a1zuJ15rjOTV2bHV9gFKkbt4bLZFQxraoIxzbjPGbgVwHwAdwB2c8xcaeU6CIFqXScuWdo2haZh0vWobmXhltfnmps6mLIU0QhF8WPSbhYZ3oeSc3wPgnkafhyCI1idj2ejrSACAX0Ip2hp4iVcpxFMocXcqhq6kWXWfcLfKZrVrqNUwQRBNQ8ZypF1jKHaN43IYSjJ1Krvm6+99FeJG9VBeD3WrFGJPdg1BEESD8Oya6MTrdCL5vs7ElOcKNyQLPzYLTTZBlyCIMxXOuV9dIzx5JidDubKtAeRrsyV8w9Cpdw1BEETjsGwXtssVkS/1k7ed6bU1qIXwSlDhevlmgUSeIIimQDYni3l2jTrjVUTytXrytVCphLLZ1nhtsuESBHGmkrFKzckAL/EqI/nQjNd6JEdF/5pwwpXaGhAEQTSAST+SV+vkReJVtDWoZdGQWjEq2DXNVkJJIk8QRFOQKYhe8uWJV9vl/vJ/9Yzkg5G7SOpSdQ1BEA2Hcw7O+dQ7thCTVkjk9WCrYU2rbY3XWin1j/ee1/NXwlxCIk8QTUbRcbHmE/fgnx95eb6HUlc++dNdWP3xuyu+nvU9+XYlki/YLjb87X3Yc2IiuMZrHSwVQ49uUEbVNQRBNBSxzunn79s7zyOpL997+kjV1/NFT+QTpidbwjOf8CN8dY3Xeiy2He5dU/LmZ33oOYVEniCaDFFl0qpYdvT1Ffwka9wQJZRB+Wp0CWU9raC5hESeIJoM4U0DwES+OI8jaQyVbmKWH8nH/J4zYSGvd+LV1DUwFrVoCIk8QRANRFSZAMDuY+l5HEljyCg3MRURyQuRDwt5vUsodaV1MVCygJpM40nkCaLZUCP53cfGp/Xerz58AB/93rOBbXfvHMSWrz5RtVrn2FgOV332IRw9lZ3yHO/6+lP4yfaBaY0LAGK+HzJZQeStorBrvP3Cdo2uMXkDiE3RYbIWVPsHUGybJlN5EnmCaDLUSPd0tjCt9/76lVE8c+hUYNsLx8ex4+iYLEeM4tDJDI6N5XB4tLrIuy7H1oOn8MLx6f/CiPsJ1WqRPGMlm0YkXgW6xrC4LYbbfvdSvHnj8mmfP4yha4EWBtSgjCCIOUEVwVxxeknY4bRVJqJi1qh4jELu41beByhZSZWSp9VImF5CtWIkb7uIG5r0yE2tPJIHgDdvXD7lgiC1EI7kqUFZCMbYpxhjxxhjz/v/valR5yKIM4lJq5SAzBWmJ6ZDE3lkCg5cJWov2G7gMQq122M1RNK02rEqkZCRfIXqGtuVlg4Q7cnXk7AnXyrPrOtpGk6jFw35Muf8Cw0+B0GcUYhIvKc9Pq1IPl90MJb1qnGyRUdOKir4wl2oEsmLbo92lX2AUhRuzUTk/dLISnaNZbuI+9E+EGHX1DnCNnUWSLKWulFSJE8QRB04NpbDRX9zL/aemAhsz1g2YoaGjoQRGcm//46t+Mw9L5VtH5mwAscQ1BTJKz1iqiGOW2sk/60nDuIvf7oLQC12jROI5KMSr/XE1DU569U7fnRVz0Kn0SJ/K2NsJ2PsDsbYoqgdGGO3MMa2Mca2jYyMNHg4BNE83L3zOLIFBz/edjSw3VsCz0AypkdG8r/aN4LbH32lbPtQOh84hqDkyVcW8FJL3yk8eWt6nvwTB0bxyJ5hAKpdUyHxarsyOQs03q5572vOxqfful4+f9ulK/D5d2yUN6NmYVYizxh7gDG2O+K/LQC+BuAcAJsADAL4YtQxOOe3c843c8439/T0zGY4BNFSCG86FTdC2220xXWkYvq0PPmhdHQkL0S+WvRdy40AKN08qlk/KhnLlu8Ric3JQmW7JuDJs8aK/Hl9HbhpwzL5fGlXAu/cvLKu55gLZuXJc87fWMt+jLFvAPjP2ZyLIM40wishCSYtB20xA0lTlx57LVSK5IW4V6uukXbNVInXwvTsmkzBRqbggHMOxz9H1UheqX/Phn7FNJuNMlc0srpmmfL0bQB2N+pcBNGKhPuny+2+XZMwpxnJT5REXq1gEVF3tWRpqaXvVIlXZ8pjBfe34bhcrt8aHpuKJ/KlG174ZtBsk5TmikZ68p9jjO1ijO0EcC2AP2nguQii5chYwV4tcnvBRlvc8OyaKtU1tz20H9964qB8Ppy2ZLVIVOK1aiTvv3Y6W8TvffPpijNfKyVev3DfXnz314cr7u+JvSv/jsKyncBn4YZm6Oo6iXwUDRN5zvl7OecbOOcbOedv4ZwPNupcBNGKCAEMWyQy8WrqyFaJ5O/ZdQIPvDQUeN+iVEz+LajFkxdj2Dc0gccPnMSuCu0UMhVKKH+24xju2VkuAeJGlrFseY5qM15Vu+Ydr+rHrdeei7dc4s1upUg+GiqhJIgFihDicEWLSLwmpojkc0VH9nsBPBFflDLlMeT2Gma8CitF2EOV9p2sEMmPZYsBuwjwVrcSlpSwbcJjU7GKbiCSjxs6PnbD+ej2r4k8+WgaPRmKIIgZIqL0cCSfsRy0x02kTAMF24XjL2IdJldwUEiUxLbouHK6f6C6xvYnQ1WN5L3XxE2lkuceFck7LsdE3ka4/1m24MhtGcuRIl/JrglH8gJRLx+2bwgPiuQJYoGSiYjkRfTbHteRjHn/fCtF89mCXRbJxw0dbTFdJkiBUiRfreyxKCL5YvVIPiMTr6Xji573k0q5pHp94m/xa6GSBVWw3cjukmLbVOWdZyok8gSxQBFWhhCvouPifXdsBedexU0y5v0Qr1Rhkys6AeEuOp5ItsUNZCwbj+0fwWfuealsxuveExN4+9eexPvv2CqX3JORfKHUm6bouPjTHz6PA8OT8hxRds14rlTmOVyhjFO1a8T2HUfH8LEf78C+oQn8jzufw6Q/0zeMqJ2fSSuFMwGyawhigSKiYmHXnJy08Nj+kwCAay/oxc4BL/mpirzaE77o8IDYWrYLU9fQHjcwWbBx/4tDuPOZo9LCETeTbYdPYfvh0wCAo6eyOK+vQwpwVvHkT4zn8e/PHcOlq7pxbm+7P+aSXcM5B2MsUMs/lLawtkfsWxq3F8l7YxU3li1ffQIAsLwrgf94/jgABEooBUL4Z9IU7UyAInmCWIA4bikpKUoLhYh98Z2XYF1fB5L+9HrVrgm3llFtk6LvaYtI3iq6KNiuFNWCXZ4DKNkzQbumYLsyclYj6GDVjveeQCQ/MXUkX3R4oEvmyGRppm5UJB8nka8KiTxBLEAm8kWZlCy6JbsGKAldKlYu8uFKHFWAC44LU2doi+vIWDbyvqgLsVVtIYH4lSCjbGHXOFzeHPLK+dWlCcUNRhV5ddZt0JN3As3P1OOo7RiiEq8ykndae4HzmUIiTxALEFUY7dCMVFFNIhplZRVBDE9IVUW+aHPEDN+usRwpzuJmIvx7VWxF6wCZFA1E8kLkS+cIzKT1zz2WC9o1cl9l3JmCDUf5BaHW4as3hmqePEXy0ZAnTxBzyI6jY3hozzD+5Lp1eHlkEnduPYK/fNOFsjmXICDyio0BlKJZEcnnq0TyBdvFz3cch2W7fiSv2DV2+b4AUFS25wvBxKuj/KoQlTvq+SctG11JE+O5orxppP1rWd6VwM93HMcFSzuQihn45mMHA++zXY5UzJvgtf3QaflaMJInT366UCRPEHPIPbsH8ZUH98N1OR7eM4xvPHYQpyOajKl+tfDIC6FIPhkTkXxJZJ2Ifu/feuIgvvvUIRT9EsRUzEC24ATEGVCX+FMi+Qq1+gXblXaPuFlkLBsF20VvR9zb7t8ExnNFJEwN7/uN1RiesPBv2wfw0e8/K6P1s9piyPievLhxHVHaJoxmqnvyN61fhi2bluPjN11Y9hpBIk8Qc4qMfm1HRuZRqy2p9e0iOg978jLxWlAj+XKRH8sVkS+6sByvVW/S1JEvOgGbBSjdRNTxyMRr6LhFx5XvFzeLYX9RklWLU97x/OOMZ4voSpr4yG+eg6vX9SAfirh7OuKyukZYUOovGXWOU5Qnn4zp+MrNl2JpV6LsNYLsGoKYU4SPnS04UkyjJiGpUXaxLJL3rJ1khF3jRoh8OldEW9yQdfKm7vn4lSJ59UYh9gl3nwx48v64hHe+0hd5caMayxVkmWbc0GCFztuZMDGRt+FyoM2v/U/no1soR4k8UR36xAhiDhHRb67gKFUz5cIcbgsAlG4G4Ug+O1Ukny0iY3ltBUxdQzKmw+XARD7YPkDtYZMKWUHhMRaUSF6IdljkRbXLeK6I7qTXGC1h6rBsV0b7ANAW12XkLm5c6Zwte9KoxHSSrOlCnxhBzCFq2aHtBG2Y8WwR/+fePb4V4u2XMLWyLpFC6KLq5KM8edvlSPuCHjM0+b6xbCGwX8EuLQySMHXEdE0eO2wpqfX1IpIf9hOkKxclAZRuVOM5G51+JJ8wtLJfEG1xQyZn2+K+yOeL6EgY8leLQF3+j6gN+sQIYg4RwpctOMqSet7j4wdO4muPvIyXBtNyv/a4KZOeYU9e0xg64kZgRmmUyAPBpK2IljOhdghqJG9oDAlTU+rkyz15Mca8EsknTR1ntccD15rOFdGZ9GyYuOmJvLB6/vsbzkMqpstfFUnT2288V0TC0MsWTInpzbW+6kKARJ4g5hAhiLmio0w+4oHXJq2SX94e12XiNVxdAwC9nXGMTJSqT6LsGhU1kg8jSieLDoepe1U4uUrVNcqvDSHmQxMW+jrjZTNQvYZqnlgnDB35omf1vP/Ks/Gn161DzNBk/b2I5CctGwlTlx69gCL56TPbhbzfyRh7gTHmMsY2h177BGPsAGNsL2PshtkNkyBaA1Xkw1UzoiQxY5UqX9rihlInHyHyHYnAZKFKkbwgpjMZyYcpTYZyYfj7SbsmlHgt2jzSk+/tTCBhBhuGef3vfZE3dVi2V74pKmnihi7HLXIBnHtJ1vaySJ5EfrrM9hPbDeB3ADyqbmSMXQTgZgAXA7gRwD8zxuh3FnHGI4QvV3BKNoy0PUqiaNkODI0hbmhyP/FetVa8rzMeWIxjSpGvFskLkXc4DI0FVp4KJ14tR53x6pdQpvPo60xIS0VU4BQdXorkTQ0u964l7o9DvR5h13j76jKyF9DCINNnViLPOX+Jc7434qUtAO7knFuc84MADgC4YjbnIohWQEbyhZJdU1pIW7VrvJpxQ9cU797bX41m+zoTGEpbsvvkVCLv2TDlIs+YNwHpu78+jKI/MzYZ05USypAnbyvVNX7HyaG0hb6OeGAGqmhz0BYrRe0CYeuoZZGqqCdMrcyTr7YSFhFNo377rABwVHk+4G8rgzF2C2NsG2Ns28jISIOGQxALAyGM2aITiJzV10TzsLihwdSZtGsKEZF8b2cCBduVJYhhWyVMTNekTaLSHjNweDSLv/6P3RiasDyRN/WKi4QUHFfaNPmigwnLRq7ooK8zIUXbsh3ZhKxNieQFiahIPqbeBHT5C+C/vXYNlnYmcMHSjqrXR5Qzpcgzxh5gjO2O+G9LtbdFbIsMMTjnt3PON3PON/f09NQ6boJoSmRFSqHck7ekJ++1AU6YOnRNC3jyGgtaFn2dXiWL6O+iRtxR61qbhhbpyasRs1V0pCefram6xpWLgfR2hiJ5vwmZEOu4GYzUgeAvEzXRGlci+WvO78Wv//IN6EiU184T1Zlyxivn/I0zOO4AgJXK834Ax2dwHIJoKayI6hpp1/iR/KTleJG8qcHUWGBmbLh3S1+nN5V/KJ3H+Us7AiLfmTAD7QEAIF7BrlFtkkzBRmfClO0PgOp18pbt4MS4JcdTiuTdskhetWaEdaMKv3oDErX64fER06NRds1dAG5mjMUZY2sAnAdga4PORRBNQz6irUG4hNKL5B3EDR2GzgINysxQdUlfR0nkgWAkHzVj1KyQeFWrWDKWH8mbetU6eXEtLgeOjXkNxfo6EzB0DRrzxivWklWrawQiko8r15SKBT17Ie7hKhuidmZbQvk2xtgAgCsB3M0Yuw8AOOcvAPgRgBcB3Avgo5xzypgQZzSOywOCHm5QJuyPyYLXBjhhajB0rVQn76/spNLr2zWiOZgqxt3J6LYAU9k1k5YNw99P9KqP6kKpNlE7POqJvOhAqWsMtz18AM8fGQMApbpGEXkZyZeuydQ1OcvVq64xysZHTI/ZVtf8lHPezzmPc877OOc3KK99mnN+Duf8fM75L2Y/VIJobtSl+HIRM17VSD5fdJAwdBhaKfFajIjkE6aOjoQhJ0Q5fpXNhcs68Zpzziobg6lriPmRtvfcb3amiG/BdmU9vUgGl/WpVyJ5wKvM6YgbUowvXbkIAPBPD+0HUIrQA3ZNhCdvaEw+Txg6LunvxkXLOnFWe6zsWojaoJkFBDFHqK19s8XScneFCLsmX3QRNzUYWqlOPsqTByAX6QAgV1f63Ns34s+vP79s35ihgTGGlJ/gXJTyxDO8gIihaUiZOgqOC9txI+waHrieI6ey8lcFAPzoI1di08pu+b7ISN4sj+R1jUmPPm5quOrcJbjnj18XuVgIURsk8gQxR4Qj+XCDMmnXWF5vl4Shw9RZIOIPR/JAUOSFqGoaYOha2eQhGSX7QipEXl2kBICsrgH82bkhu8ZxOXIFR/4SODyalUlgQZ8i+pEllL5wq/1oDE1TInmSp3pAnyJBzBFq5Kt68kU7yq4RnjwrtRq23chp/d0pJZL39zW0cisEUNoUxzT5XnFOFVOpp88pNf0q6XxR9okfzxUjRD4hxxCTE5+UxKqwa4xwJB+8ERGzg0SeIOYItcVutmCXLbcXmAwlqms0pdWww2FWsGtE22DhyYsIPtzQS0TeKbNk1zCGslJLQ2PSR88VnMjGZxP5UgthAAG7BiiJfHgWq/zbKPfpDb3kyVMzsvpAnyJBzBHCjjE0hlzRLWs8prY1kNU1mjrj1QmUGwo8u8aLxMUKToYv8jFdk38Dpag54Qv4JSu7ceHSTtxy9drAMU2jVE+fsZyK7RLUCp4V3cnAaz1+pY1aGZMwygU/HMnLMZIPXxeoLokg5ggRyXenYsgV7EDVjPd6yZt3XG/hDsaY0k+eByJhQVcyhnSuCM653FdE8jFDQ3fKxMlJL9IXnn7Kt0Led+XZ+INrzgEA3LRhGa767EPeflopOSuW4kuYGvJFb9at0PwuReQvXt4VGJeI5NUadzU6j0dF8n5TNu98JPL1gCJ5gpgjhMgvbjMDycxwJA94CdS4ISL5Uj/5KE++K2mi4LjIFR24YbvG8PrCi+hYrirlR+mqkAZtk1JLAbEoiSizVCPz7lSptPGiZZ2BcYnEa6VIvpInX/LvSZ7qAX2KBDFHCLvGi+SdgNcOeG0N2tQZn6Y349Xl3gLd1aprAM9Xt2XiVUTyOlIxHQn/hqFppUXATZ0Fqm9iIW9cRODjOe9XgLghqP1l1Eg+PMlKzMZVRV5T6uCFiAfOq2mR7Q6ImUN2DUHMEdKuSZqBlrmilDJvO1jWlUTmlDd7VI2yi65bsU5eVMiM54rSO9eUSB7wBFNNniZNvaz2XP2VYGqllgIikhfjSSmJ1M5EZQnpTpmI6RraQ31n4oYGxgDGxBhLrwc8eUq81gX6FAlijhBtABa3xbzJRHap/l20PFjaVSpDFHYN4LUVqGbXAN5C4E4okl+1OIU1S9qQMLXAr4A1S9qwanEqcBzVHjF1TYnkgyKvRvL9i7xjfOz6dWXjYozhouWdWLOkLXgeU69sEwXsGork6wFF8gQxRwjPXfjYBSnyXL526apubD14CoAnqrILpG/XVJrxCgBjSiQvbJh//C+bwAHc+I+PIm+Uat3/4DfPwUd+85zAcRjzrJSC4y3/Jz35XCnxCgRLIl+1ehH2/a+bZGlmmH/7yJXQQj2PE6YG9V6l3rj0QOKVYtB6QCJPEHOEqJ4Jd4f0FsX2XlumTChSo2/bcSO7UAKVPHlvP02pl48VSu/VtGhRjhueyJs68/rcGBrGw4lXJZJfc1ZbxWMBXgI36hyq8Gsa82f2chg6VdfUG7pVEsQcIatrUsFmW7ZT6s2eMHX0L/LqzUWrYUBE8jw6kvdvGumAJx/cJ2Hoke8NI/YRN4n2uFFu14QSqdMlYeplUbqwZrxIvry0kpg59CkSxBxh2S50jaEjlKz07Bovkk+YOtb1dfj7OzB9sS1WieTbYwY0Bjx/dAwnJ71ulEZI5ROmXtFSUREiL/Zti+sY86trRCQv9jn7rFTEEaYmEfLk1WMamqYkXimSrwdk1xDEHDGWK6AtpsvZpoKCEsnHDQ3v2tyPh/YMY1lXEodOZgD4idcKnrymMSzvTuI/dw6WtoX0fHl3oqaoW0TPwmZpixmyukbUtS9p9+rfP/Abq6c8XhTLu5Mo2MHlJYQvr2sMy7oSWNIej7yhEdOHRJ4g5ogXjqdxwbJOOdtUUAzZNdde0Itn//o6LG6LYeC0V04perfHKkTjd936Wlz2D/cD8ISShZKdf79lvZwoVY1SJF+ya46dzgEoRfLn9rbL8c2Ez79jI8JDETcQQ2N472vOxttf1T+jYxPlzHZlqHcyxl5gjLmMsc3K9tWMsRxj7Hn/v3+Z/VAJonkpOi5ePJ7GxhVdZZOG1EWxhdgJARVim/GX0avkqy9ui+GCpZ7NE24vDHg3j1Rs6phO+OElu8bAhN+hUtgnps5mLPDiOOHPQI3kDV1DJy3YXTdmG8nvBvA7AL4e8drLnPNNszw+QbQE+4cmYdkuNvR3la2xajtcsWuCr4l6d7HWajULQzQUM2aQDBVEJV4FiYiVnOqFGskT9WVWIs85fwlA2U9DgjiTOJUpwNQZOpTocyJfhO1wLGqLIVuwcc8uzy/f2N9dlgBVSyjDVSeiuua5I6cBVI7kgVJbAX0W/x6FgBtK4lUe3785RZVFzhY1kifqSyMzG2sYY88xxn7FGHtdA89DEPPKB7/9DP7+5y8Gtn3qrhdxy3e3AQC+cN8+3PbwASxui+HsxalAJK9r3spPuaJniYSjfBFRf/H+fQCA7mRlmyTp94jXa6iiqYSIqFW7RrDcbyXciPVWY4YWmUsgZs+UkTxj7AEASyNe+iTn/GcV3jYIYBXnfJQx9ioA/8EYu5hzno44/i0AbgGAVatW1T5yglggvDIyWSbOg+M5vDLiVcaMZryyxp999CpoGgv40amYjqLN5ULcoge7wFAE+6u/exluXB/1T9EjWQ+7Ri9PvApuuHgpHv3zxVg1w9LJasQNnaL4BjGlyHPO3zjdg3LOLQCW//d2xtjLANYB2Bax7+0AbgeAzZs3T53+J4gFRK7gIJ23y1ZWylg2RjMFFGwX2YKDC5Z2YKXfK0Ztt5uK6Sg6LobSFlIxPSCqQNCDv3H90qpCmDRnb3mIzo/iF4SI5Fd0J9EWNwKRfT2JGRr58Q2iIXYNY6yHMab7f68FcB6AVxpxLoKYT4Yn8gDKl88TC2OPTFrIF51A9K5pTHrvSVOIfB59nYkyu0IV7KnEW1TP1MOTD9s1a3vaKr6nHsSN8kXHifow2xLKtzHGBgBcCeBuxth9/ktXA9jJGNsB4N8AfIRzfmp2QyUIb21UKzSRphK240qxrYToyzJThtKezVIeyTv+63nkCk6ZnSMEORkzUHQ4htMWekNWDQAI2atldqkocayHJy+Sq1n/8wt3kqw3FMk3jlmJPOf8p5zzfs55nHPexzm/wd/+E875xZzzSzjnl3HOf16f4RJnOu+/Yyv+9z17atr3jicO4vov/ari69sPn8Jl/+t+HB7NzHg8Q2kvkp+0bNkXHvDsGgAYTueRLTiyvFFQavblR/ITeblcnoqoptmyacWUYwnfSGaCrK7xBVfUw7/uvJ5ZH7sai1OxwKLgRP2gGa9EUzFwOhdYjagaR05lcXw8D9txI8v+jpzKwnE5XhnJ4OyzZhapCpEHgHTexuK2GDjnyBRs/3XPrgn3YZF2TcxbzGNwPI/rLyqP5C9e3oW7br0KG1Z0lb0WRtxIivbMU1vh1Zreflk/zl/agY393TM+Zi380evPw/uuXN3Qc5ypUHMIoqnIFZ3AqkrVEJZJphC9/6RiqcyUYb8qBgDGsgU5RrEI01A6j1yxPJKXdo0v/gXbjYzkAa+2vpbSQtETp6D8opgusneNH8lrGmu4wANeJ81GVO0QJPJEk5ErOHL251QIPz5TwZcX24WvPhPUG4Tw5dU8wFDaQjbCkxfPVfHvrSDytZJSbhgzJdy7hmh+6JskFgyOy8GrNNFyXK8lb7ZGkc/UKvIT+YCfLs5VC0PpvPSxT2cLcFwuf0EAXvVNruiUdZ4U1TYppSSxLyLxOh2SdYnkxaxWSoK2CiTyxIJgIl/Epr/7JR58abjiPqK/S75mu8YT8UoVNmL7z3ccx7mf/AV2DowB8NoUrP/b+/DkgZNTnmM4bcnywg9+exv+5IfPy/OaOsPxsRwKtouUGUx/iUhezS+IGaUzJVmHSL4z6fWmD4+XaF5I5IkFwciEhQnLxgvHyyZFS0QEX6snX7JrovcXYjyR9x53H/POPXA6i1zRwf7hySnPMZTOy0U+AODxAyfleZd3JzE47tk5yVjwn5qIut+6aQW++M5LcPt7XyUnS82UcGfHmfCWS1bghx++Uq42RTQ/dLsmFgSyrnyichJURPC12zXefpUi+bD4iwlAwlsP176HmbRsZAoO1vW1y22nMgXsG5oAACzvSuLwqNcPvsyTj5Ui+Xr1Tq9HCWUypuPy1YvrMBpioUCRPLEgmJR15ZWToLkZ2jWVPPmw+IfFfSqRF0nX/kXBCPypl0cBBO2XZCzarqmn912PSJ5oPUjkiQWBnDxUJZIXEXzR4ShOkVxUa9XFY6VzCmYq8r2dpYSpqTP8+hVP5FcsUkS+QnVNPatY6hHJE60HiXwEn79vD/7g/26v+Pp9L5zA1Z97uObp9c3MR767HV+4b2/ka195YD8+8K2tdTlPafJQZZFXSyer+fLfePQVbPnqE7JW/Qdbj+Kazz9clpCctOxAf3Yh6mJN07GIlgdv/qfHcOfWIwBKvzp6O0qlj+cv7cBp/30rukvbw3XyIsEZr9IffrpQJE9EQZ58BDsHxvFylaTbc0fGcORUFifG8zOeKdksPH90DNkKgrrt8KmqidLpIBt6TVhwXB7ZrEq1afIFp+IScdsPn8bOgXH5/KVBb4z7hiawXpk5minYuP6iPlx/8VJ8/r49UuTToUeBZTvYfSyN3ce9Y4sbUl9nHD+/9bVImBrueOKQTOAu7SpF8uEZr+/avBLr+jrKts+G8I2EIACK5CMZyxYxVuWn+rD/j3s2k2iahbFcAeP+TM4ww2kL47li1dr2WhHWicuB0cnozzVbYyRfKXmrCr93TgddSRNvuWQ5FqViMnKvZNeEI321PfCG/i6c19eBjf3eTSQV07E4VVpcIxxld6diuOb83orXMBMSBok8UQ6JfATjuSKyBaei7ytEZDbT4ZsBy3aQL7oVvemhiTwcl0/Z6bEW1EqXSjdPVdirVdhUSt7uOjYWeD5p2bJ/e1fSLLdrcsGbWzok/qKpmNpyQPSYaYsbgRr4uYiyNeriSETQMiLPOYdb4yzFqZgq8SZEqNVFvtrnkC86ZZHvbMgEWgFEf645JYFaKZJ3XV4xeatG8kXHRcF2Zb/0TkXkp4rkhdgPp/Nl7YHX9XUgpmtoD4k8JUWJ+aIlRP65I6dx/l/di1/sPoE3fPER7Ar9LJ8OrsuRzldOvAElEVKbU6kcG8vhms8/jGePnMbrv/AInnz5JK7/8q+w9WDllvp/+L3t+Majc7euSr7o4KavPIYnXy7N6uSc4923/xp3bj2C//adbfjKA/sBeOL2Nz/bjc/dW2rxOxJozFVZ5LcdOoXrvvSrMsEcSudx7RcewcsjXu4jU7BlU6w//P6z+MWuQbz9a0/iZ88fwwe+tRV3bj0SEPZw/5ov/XIv1v3VL/Curz+FolO62ctGWwx44Xga6/7qF7jqsw/hhD9JSYh8d9LEwZMZXPP5h/H0wVH/M3KRzhdx7RcewYV/fS9+sPWod725kl0TbioWMzRcuKwDHQkDHQkDIsivp/dOENOhJRKvi1IxFBwXjx84iZdHMth++BQ29E/dmjWKibwNYTFHRajZgi1nSFaKOJ/YfxKHRrP48bYBvHIygy/+ch/2DU1i2+FTuGJN+UQTzjke3jOC42N5/P7Va2c07uly9FQWLw2mse3QafzGOUsAACfSeTz1yijytoPnjozhLL+XuMuBu3cOYnl3En9xo/f+QIvdKpH8tsOnsX94Es8dOR3woF88nsbBkxnsODqGc3raMWk5WLk4hQ9ctRqfv28vvvn4QWw/fBrn9rTjkX0j6EyaWLukNOkoLPK/PngKBdvFtsOnA9t7OuIYHM/jQ69dg7ih4/h4Dv/+7DE84bcsaI8H2wsc8icvCZ48MIqDJ71+8w/v8VouiDyEt5pTeb+Zv9uyHlbRgaYxdCa8XwhzlRT97oeuqNjNkjgzaQmRF3XKwnMdqhBh14Iq7FHipfq9lUR+pz8OMZ7tvvBU8oonLBu5ooMXB9MoOu6cdACMspyEnfHckTEAwGim5EmPZgqBihfVN69m14jj7xoYD4j8UCh5nbFstMV1vO/K1XhozzAe2TsCAHhhcByce/sv7SqJV9iuGU7nETc0WKEyyd7OBAbH87hkZTfevHE5xrNF/Puzx/CUX8vepnjygfd1xDE8YeHxA9444oYmP490rojxXBFWhfbAm1Z2y7+F1z9XkXyjF/cgmo/ZLv/3ecbYHsbYTsbYTxlj3cprn2CMHWCM7WWM3TDrkVYhFfN+Gu894U0nn41XrgpWlHiJY3enzMoJPl8sxXjC7w0jqnUKtiunxDeasMgCmNLmOjlpyW6NUS12oxCf0c5jwWOHbzKTlo02f1boRqXMUXyGw2kL+UK0XeNF1RbeeFGf3Nbt914RnR2FGHelTKw+KyVnpUq7JtSrZZXfR+bx/SexpD2OS5Se6i4HXh7xovup2gN3p0xav5SYV2YbMt4PYD3nfCOAfQA+AQCMsYsA3AzgYgA3AvhnsbB3o+jrTEgvttrU+KlQKyrGIkoHhQ+/YUVXpGgXbBcv+cKkesNAZZGfjtDWC1EhpCYpdx4blysWReHyUnQ/PGFB6Fa1clNxzbvDIh86f0apdNmgCKr4DMXiG2LykhrJi19Cl/R3YUm7J+rrl3s3CvErr0+ZsLShv1t+j+Kc7fGgyLcnvO2HRrPY2N+Fvq6gmIub8VTtgbuSJk1SIuaV2a7x+kvOuSh5+DUA0WlpC4A7OecW5/wggAMArpjNuaZC9UaH0nlYtoO33PY4XvOZB/HQniF86NvP4N7dg7j1+8/iJ9sHAAB/9R+78K+PHwQAfOG+vfjSL/cGotLbHn4Zl3/6AVz+6QfkzE4hWhv7u5ApOHjq5VH8zj8/IZO1+4YmAjMr1QV9htLeRJ/3/uvTeHjPMH7//9+Gu3cOymMyBvz9f76Irz58AADwuXv34PJPP4BP3fVCxet+aM8Q3vuvT8NxObYfPo13/suTyFaYxq8iboTHx3L4vW8+jUf2DmPXwBhuWr9MNuqKovQLII9lXUkYGsO3njiIP/rBcwCAB14cwvvv2CornYYm8mAMGBzPY3gij6Onsthy2+OyrW/QrjHkZxsmU3AwMmHJPIEQ+X98YB8+9qMdALwb/cb+LixKmVi5OIlUTJcTptTWA+ovBfHrwXa972yRH9GrHvr6FV1lYi5FfopIvitpysU8CGI+qKcn/0EAP/T/XgFP9AUD/rYyGGO3ALgFAFatWjXjk6uR2lA6jxePp6XH/M3HDuLJl0eRLTh46pVRjOeK+O1LluNH2wZwXm87PnjVavxw21HojOGP3nCuPM7JSQvLuhJYuTiFh/eOYHgijz0nJrCkPY5NKxcBAP7pof149sgYth8+jWvP78UuP2LdtLIbzx8dw2WrFuENF/bi0MkMfvrcMRwYnsRj+0/CcTmefHkUGoM81j9sWY9vPPYK7nr+OD567bn46XPHMDJh4SfPDuBvf/uiyCXg/nPHIB7bfxIHT07i3t2DeOaQN9vzNWvPqvp5CbE+OVnA4wdOwuUcp7NFvOrsRXjtuUtweDSD/++hAxHv80R5z4kJnNvbDst2MJS28PMdx/EPWy7Gz3cex6/2jeDgaAZrl7RhKG1h08puPHdkDLuPjWPgdA47lF8rJbvGkSLf15nAP2y5GIdHs/imfxMGgMOjWXSnYhgcz8s6+R9vG8CxsZx83x+/4TwcOZXFur4OXLFmMS5duQirFqcCnviWTcsxcDqLVNyQHSRvWr8MR6/P4n2/sRr/99eH8Z4rzsa5vR04nSngXZv78YtdJwKfw/4hryqoNyLxqnLL1Wtx7HSu6j4E0UimjOQZYw8wxnZH/LdF2eeTAGwA3xObIg4VWcTOOb+dc76Zc765p2fmSSPVG03nbTxzyCtXPLe3HU/6/qtItu06Ni4j7r0nJnDkVBYjExZOpPPyH6+I6G64eCn+7Lp1ADzLYdfAODas6JTRpji2sFl2DoyjM2Hg8tWecC/rSuAPrzkXFy7rRNHheHTfSNn7htJ5dMQN/N5rzsaWTSuwf3gCR0azGBzP49zedkzkbdmyNozwuncOjMubWi2WT9g6EuPZ2N+Ft7+qH9de4CVJl7THyt6XLzrYNzSBDSu6Aq0FdvmfjxhDOmejYLt4/fm9YCw4RsFw2vKaiVm2rHQBgPdeubqsEungaAZtMR1JU0e+6ODkpCUFHvBE/pKV3fjtS5bj/KUdeNul/Vi9pA03XxEMHno7E/i7LevxP2+8QC7wHTM03Pr689CZMPGH15yLrpSJP71uHf7hrevRvyglxVx8HnuHJtCRMORarZXY2N+NmzYsq7oPQTSSKUWec/5Gzvn6iP9+BgCMsfcDeDOA9/DS/PYBACuVw/QDOF7vwasIu6bD91IfeGkYS9pjuOHivrJ9x7JF3LNrEABguxw/2nZUvvb4gZOIG5osk9zY34WLV3SBMeDpV05h//AENvR3o68zEbCIpMAeG8NG/3VvXMHH+18aCozl+Lj3q0OIyMYVXXA5cOczXhOs97zaE6hw4hLwkpWiznzH0THpe0ftG2YobcnPSmDqDOcv7Sgbd0zXkIrp0JiXJH5xMA3H5djQ3yVtKgB44sAoXvHLDXcOjEvffU1PG87taceugfHADagjYaDguBjNFJArliJ5gRiDGCfnXnuAZExHruDIX02C8MSkeiLGIhKyIxPlNfIEsRCZbXXNjQD+J4C3cM7VUPMuADczxuKMsTUAzgNQn3aFFRD/4MS08q0HT2HDii650rxYh1M8fu/pI4G/dY1BY8CB4Ul0JU3Yvqe8sb8L7XED5/S0485njsLlJU93w4rSsXcdG0O+6GDviQls6O9SRDIeeNx68FTZWLYeOlUav/8L4XtPHwFjwO9c2o+YoWGX72GrvHDMKy+M6Rp+tuM4MgXHG0vEviqce7NCxWclxnHB0k65xmePL5jdKROdSRPLuhJY0h7HUNqSQr2xvwsnJwvyGD/wuzOKz6PUwCuBDf1dePqgd5MU5xPnF3Xo7RVEfoPioSdNL5LPFhzsGhgHY96Ep464UXaTqCdiLGpDuqgaeYJYaMz2X8VtAOIA7vf94l9zzj/COX+BMfYjAC/Cs3E+yjlvaF9e8Q9uQ3+XtB429HdLW+WG9Uvx8x3H8YYLe/HAS0MYzxVx1blnYc/gBEYzBVywtAMu59g35Im8qL5Y40/A2biiC//+3DF5DsATuQdeGsJNG5biZ88fx43/+CiKDsfGFV04y6/yEG1o1Xa0N65firt2HMf1F/fh7l2D4LwUhYpfCENpC+f1tqMrZeLCZZ34wdajsnZcIKJocTwAcizXfelXFT8rl3MUHS4/K/EedQKZqWtY0h5DV9JEV9JAb0cCqZiNu3cN4r4XT2BJexxLlUj29Rf04t4XTsgx3L1zEH/+453+tce9z+/ZY4Exbljhnf+/+0nbsEgvaY+BMWDNkjbsODqGTMGRkfwvXzgBMGDtkjYkTL3mJQFnSulm7f2yKThu4DsliIXKrESec35uldc+DeDTszn+dNiwohsfvnotfv91a5G1HIzninjbpSuwtDOBj12/Dm+8qA8bV3Th8jWLcdmqRXju6GncfPkqHBvL4bH9I/itDcvBwXHPrkFcc34v+ruTOHo6K+ub33vl2bAcF6vPSsmo7u2v6ke24OC9V54NjTFYtoPLVy/G69b1wNQZbrl6La71JwCt6E7ig1etwcikhVtffy7OX9qBa87vwYXLOvHi8XTAN/6z687HI/uGcdN6z8v96DXn4D+ePxZ53ef0tOPNG5eDA+hpj+O//sZqAJhyUY31K7rwe68+G3FDx9suXYHzetvxhguD1tZf3HABVi5OYXgij86kiYm8jXt3ezbX1ef1gDGGH334SuwbmsCFyzqgawzn9LbjTRuWwnE5XM7R25HAykUp3LRhGXYMjMPUGf78hgvQ0x7HLVevxXiuiHS+iFfrGq5eF8zJGLqGv/qti3DF6sVYs6QNzx45jXdtXomB01n8ys9t3Lh+GVKmXnFhkHqRihn4xE0X4Jrze9GRMLzv7PKVU7+RIOYZVo82sfVi8+bNfNu2bfM9DIIgiKaCMbadc7456rWWaFBGEARBREMiTxAE0cKQyBMEQbQwJPIEQRAtDIk8QRBEC0MiTxAE0cKQyBMEQbQwJPIEQRAtzIKaDMUYGwFweBaHWALg5JR7NQd0LQuXVrqeVroWoLWuZzrXcjbnPLKN74IS+dnCGNtWadZXs0HXsnBppetppWsBWut66nUtZNcQBEG0MCTyBEEQLUyrifzt8z2AOkLXsnBppetppWsBWut66nItLeXJEwRBEEFaLZInCIIgFEjkCYIgWpiWEHnG2I2Msb2MsQOMsY/P93imC2PsEGNsF2PsecbYNn/bYsbY/Yyx/f7jovkeZyUYY3cwxoYZY7uVbRXHzxj7hP9d7WWM3TA/o46mwrV8ijF2zP9+nmeMvUl5bSFfy0rG2MOMsZcYYy8wxv7Y396s302l62m674cxlmCMbWWM7fCv5e/87fX/bjjnTf0fAB3AywDWAogB2AHgovke1zSv4RCAJaFtnwPwcf/vjwP4P/M9zirjvxrAZQB2TzV+ABf531EcwBr/u9Pn+xqmuJZPAfhYxL4L/VqWAbjM/7sDwD5/zM363VS6nqb7fgAwAO3+3yaApwG8phHfTStE8lcAOMA5f4VzXgBwJ4At8zymerAFwHf8v78D4K3zN5TqcM4fBXAqtLnS+LcAuJNzbnHODwI4AO87XBBUuJZKLPRrGeScP+v/PQHgJQAr0LzfTaXrqcSCvR7uMek/Nf3/OBrw3bSCyK8AcFR5PoDqX/xChAP4JWNsO2PsFn9bH+d8EPD+5wbQO2+jmxmVxt+s39etjLGdvp0jfkI3zbUwxlYDuBRexNj0303oeoAm/H4YYzpj7HkAwwDu55w35LtpBZFnEduarS70Ks75ZQBuAvBRxtjV8z2gBtKM39fXAJwDYBOAQQBf9Lc3xbUwxtoB/ATA/+Ccp6vtGrGtGa6nKb8fzrnDOd8EoB/AFYyx9VV2n/G1tILIDwBYqTzvB3B8nsYyIzjnx/3HYQA/hfczbIgxtgwA/Mfh+RvhjKg0/qb7vjjnQ/4/SBfAN1D6mbzgr4UxZsITxO9xzv/d39y0303U9TTz9wMAnPMxAI8AuBEN+G5aQeSfAXAeY2wNYywG4GYAd83zmGqGMdbGGOsQfwO4HsBueNfwfn+39wP42fyMcMZUGv9dAG5mjMUZY2sAnAdg6zyMr2bEPzqft8H7foAFfi2MMQbgXwG8xDn/kvJSU343la6nGb8fxlgPY6zb/zsJ4I0A9qAR3818Z5nrlKl+E7xM+8sAPjnf45nm2NfCy5rvAPCCGD+AswA8CGC//7h4vsda5Rp+AO9nchFexPGhauMH8En/u9oL4Kb5Hn8N1/JdALsA7PT/sS1rkmt5Lbyf9DsBPO//96Ym/m4qXU/TfT8ANgJ4zh/zbgB/42+v+3dDbQ0IgiBamFawawiCIIgKkMgTBEG0MCTyBEEQLQyJPEEQRAtDIk8QBNHCkMgTBEG0MCTyBEEQLcz/A5df4kDEfQ2rAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "def play_episode(env, agent, max_episode_steps=None, mode=None, render=False):\n",
    "    observation, reward, done = env.reset(), 0., False\n",
    "    agent.reset(mode=mode)\n",
    "    episode_reward, elapsed_steps = 0., 0\n",
    "    while True:\n",
    "        action = agent.step(observation, reward, done)\n",
    "        if render:\n",
    "            env.render()\n",
    "        if done:\n",
    "            break\n",
    "        observation, reward, done, _ = env.step(action)\n",
    "        episode_reward += reward\n",
    "        elapsed_steps += 1\n",
    "        if max_episode_steps and elapsed_steps >= max_episode_steps:\n",
    "            break\n",
    "    agent.close()\n",
    "    return episode_reward, elapsed_steps\n",
    "\n",
    "\n",
    "logging.info('==== train ====')\n",
    "episode_rewards = []\n",
    "for episode in itertools.count():\n",
    "    episode_reward, elapsed_steps = play_episode(env, agent, mode='train')\n",
    "    episode_rewards.append(episode_reward)\n",
    "    logging.debug('train episode %d: reward = %.2f, steps = %d',\n",
    "            episode, episode_reward, elapsed_steps)\n",
    "    if np.mean(episode_rewards[-5:]) > 16.:\n",
    "        break\n",
    "plt.plot(episode_rewards)\n",
    "\n",
    "\n",
    "logging.info('==== test ====')\n",
    "episode_rewards = []\n",
    "for episode in range(100):\n",
    "    episode_reward, elapsed_steps = play_episode(env, agent)\n",
    "    episode_rewards.append(episode_reward)\n",
    "    logging.debug('test episode %d: reward = %.2f, steps = %d',\n",
    "            episode, episode_reward, elapsed_steps)\n",
    "logging.info('average episode reward = %.2f ± %.2f',\n",
    "        np.mean(episode_rewards), np.std(episode_rewards))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
